bio-table 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +47 -17
- data/Rakefile +1 -1
- data/VERSION +1 -1
- data/bin/bio-table +39 -4
- data/features/iterate_table.feature +16 -0
- data/features/{bio-table-csv-reader-feature.rb → step_definitions/bio-table-csv-reader-feature.rb} +0 -0
- data/features/step_definitions/iterate_table.rb +11 -0
- data/lib/bio-table.rb +2 -0
- data/lib/bio-table/filter.rb +17 -1
- data/lib/bio-table/overlap.rb +25 -0
- data/lib/bio-table/table.rb +19 -35
- data/lib/bio-table/table_apply.rb +49 -0
- data/lib/bio-table/tableload.rb +39 -0
- data/lib/bio-table/tablerow.rb +5 -0
- data/lib/bio-table/tablewriter.rb +10 -0
- data/lib/bio-table/validator.rb +4 -4
- metadata +25 -21
data/README.md
CHANGED
@@ -2,14 +2,14 @@
|
|
2
2
|
|
3
3
|
[![Build Status](https://secure.travis-ci.org/pjotrp/bioruby-table.png)](http://travis-ci.org/pjotrp/bioruby-table)
|
4
4
|
|
5
|
-
bio-table is the swiss knife of tabular data.
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
pipe-line setup.
|
5
|
+
bio-table is the swiss knife of tabular data. Tables of data are
|
6
|
+
often used in bioinformatics, especially in conjunction with Excel
|
7
|
+
spreadsheets and DB queries. This biogem contains support for reading
|
8
|
+
tables, writing tables, and manipulation of rows and columns, both
|
9
|
+
using a command line interface and through a Ruby library. If you
|
10
|
+
don't like R dataframes, maybe you like this. Also, because bio-table
|
11
|
+
is command line driven, and can use STDIN and STDOUT, it easily fits
|
12
|
+
in a pipe-line setup.
|
13
13
|
|
14
14
|
Quick example, say we want to filter out rows that contain certain
|
15
15
|
p-values listed in the 4th column:
|
@@ -18,11 +18,24 @@ p-values listed in the 4th column:
|
|
18
18
|
bio-table test/data/input/table1.csv --num-filter "values[3] <= 0.05"
|
19
19
|
```
|
20
20
|
|
21
|
-
bio-table should be lazy
|
22
|
-
|
23
|
-
|
21
|
+
bio-table should be lazy. And be good for big data, bio-table is
|
22
|
+
designed so that most important functions do not load the data in
|
23
|
+
memory. The library supports a functional style of programming, but
|
24
|
+
you don't need to know Ruby to use the command line interface (CLI).
|
24
25
|
|
25
|
-
|
26
|
+
Features:
|
27
|
+
|
28
|
+
* Support for TAB and CSV files
|
29
|
+
* Filter on data
|
30
|
+
* Transform table and data by column
|
31
|
+
* Recalculate data
|
32
|
+
* Diff between tables, selecting on specific column values
|
33
|
+
* Merge tables side by side
|
34
|
+
* Split tables by column
|
35
|
+
* Read from STDIN, write to STDOUT
|
36
|
+
|
37
|
+
Note: this software is under active development, though it should just
|
38
|
+
work.
|
26
39
|
|
27
40
|
## Installation
|
28
41
|
|
@@ -100,12 +113,13 @@ To reorder/reduce table columns by name
|
|
100
113
|
bio-table test/data/input/table1.csv --columns AJ,B6,Axb1,Axb4,AXB13,Axb15,Axb19 > test1a.tab
|
101
114
|
```
|
102
115
|
|
103
|
-
or use their index numbers
|
116
|
+
or use their index numbers (the first column is zero)
|
104
117
|
|
105
118
|
```
|
106
119
|
bio-table test/data/input/table1.csv --columns 0,1,8,2,4,6 > test1a.tab
|
107
120
|
```
|
108
121
|
|
122
|
+
|
109
123
|
To filter for columns using a regular expression
|
110
124
|
|
111
125
|
```
|
@@ -169,7 +183,10 @@ with rownames that appear in table2, but not in table1)
|
|
169
183
|
|
170
184
|
bio-table --diff 0 table1.csv table2.csv
|
171
185
|
|
172
|
-
|
186
|
+
bio-table --diff is different from the standard Unix diff tool. The
|
187
|
+
latter shows insertions and deletions. bio-table --diff shows what is
|
188
|
+
in one file, and not in the other (insertions). To see deletions,
|
189
|
+
reverse the file order, i.e. switch the file names
|
173
190
|
|
174
191
|
bio-table --diff 0 table1.csv table2.csv
|
175
192
|
|
@@ -177,19 +194,32 @@ To diff on something else
|
|
177
194
|
|
178
195
|
bio-table --diff 0,3 table2.csv table1.csv
|
179
196
|
|
180
|
-
creates a
|
197
|
+
creates a key using columns 0 and 3 (0 is the rownames column).
|
181
198
|
|
182
199
|
Similarly
|
183
200
|
|
184
201
|
bio-table --overlap 2 table1.csv table2.csv
|
185
202
|
|
186
|
-
finds the overlapping rows, based on column 2
|
203
|
+
finds the overlapping rows, based on the content of column 2.
|
204
|
+
|
187
205
|
|
188
206
|
### Different parsers
|
189
207
|
|
190
208
|
more soon
|
191
209
|
|
192
|
-
|
210
|
+
### Using STDIN
|
211
|
+
|
212
|
+
bio-table can read data from STDIN, by simply assuming that the data
|
213
|
+
piped in is the first input file
|
214
|
+
|
215
|
+
```
|
216
|
+
cat test1.tab | bio-table table1.csv --num-filter "values[3] <= 0.05" > test1a.tab
|
217
|
+
```
|
218
|
+
|
219
|
+
will filter both files test1.tab and test1.csv and output to
|
220
|
+
test1a.tab.
|
221
|
+
|
222
|
+
## bio-table API (for Ruby programming)
|
193
223
|
|
194
224
|
```ruby
|
195
225
|
require 'bio-table'
|
data/Rakefile
CHANGED
@@ -17,7 +17,7 @@ Jeweler::Tasks.new do |gem|
|
|
17
17
|
gem.name = "bio-table"
|
18
18
|
gem.homepage = "http://github.com/pjotrp/bioruby-table"
|
19
19
|
gem.license = "MIT"
|
20
|
-
gem.summary = %Q{Swiss knife of tabulated data; transforming/filtering tab/csv files}
|
20
|
+
gem.summary = %Q{Swiss army knife of tabulated data; transforming/filtering tab/csv files}
|
21
21
|
gem.description = %Q{Functions and tools for tranforming and changing tab delimited and comma separated table files - useful for Excel sheets and SQL/RDF output}
|
22
22
|
gem.email = "pjotr.public01@thebird.nl"
|
23
23
|
gem.authors = ["Pjotr Prins"]
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.3
|
data/bin/bio-table
CHANGED
@@ -9,6 +9,8 @@ $: << File.join(rootpath,'lib')
|
|
9
9
|
|
10
10
|
_VERSION = File.new(File.join(rootpath,'VERSION')).read.chomp
|
11
11
|
|
12
|
+
INPUT_ON_STDIN = !$stdin.tty?
|
13
|
+
|
12
14
|
$stderr.print "bio-table "+_VERSION+" Copyright (C) 2012 Pjotr Prins <pjotr.prins@thebird.nl>\n\n"
|
13
15
|
|
14
16
|
USAGE =<<EOU
|
@@ -17,7 +19,7 @@ bio-table transforms, filters and reorders table files (CSV, tab-delimited).
|
|
17
19
|
|
18
20
|
EOU
|
19
21
|
|
20
|
-
if ARGV.size == 0
|
22
|
+
if ARGV.size == 0 and not INPUT_ON_STDIN
|
21
23
|
print USAGE
|
22
24
|
end
|
23
25
|
|
@@ -32,7 +34,7 @@ Bio::Log::CLI.logger('stderr')
|
|
32
34
|
Bio::Log::CLI.trace('info')
|
33
35
|
|
34
36
|
options = {show_help: false, write_header: true}
|
35
|
-
options[:show_help] = true if ARGV.size == 0
|
37
|
+
options[:show_help] = true if ARGV.size == 0 and not INPUT_ON_STDIN
|
36
38
|
opts = OptionParser.new do |o|
|
37
39
|
o.banner = "Usage: #{File.basename($0)} [options] filename\n\n"
|
38
40
|
|
@@ -73,6 +75,10 @@ opts = OptionParser.new do |o|
|
|
73
75
|
end
|
74
76
|
|
75
77
|
o.on('--overlap list',Array,'Find overlap of two input files on columns)') do |l|
|
78
|
+
if l.size==1 and File.exist?(l.first)
|
79
|
+
ARGV.unshift l.first
|
80
|
+
l = ["0"]
|
81
|
+
end
|
76
82
|
options[:overlap] = l
|
77
83
|
end
|
78
84
|
|
@@ -146,6 +152,11 @@ end
|
|
146
152
|
|
147
153
|
if options[:overlap]
|
148
154
|
logger.warn "Column settings are ignored for --overlap" if options[:columns]
|
155
|
+
logger.warn "Ignoring extraneaous files "+ARGV[2..-1].join(",") if ARGV.size>2
|
156
|
+
t1 = TableReader::read_file(ARGV[0], options)
|
157
|
+
t2 = TableReader::read_file(ARGV[1], options)
|
158
|
+
t = Overlap::overlap_tables(t1,t2, options)
|
159
|
+
t.write(options)
|
149
160
|
exit
|
150
161
|
end
|
151
162
|
|
@@ -159,8 +170,32 @@ if options[:merge]
|
|
159
170
|
exit
|
160
171
|
end
|
161
172
|
|
173
|
+
#
|
174
|
+
# We also support STDIN for the first 'file'. A non-blocking idea can
|
175
|
+
# be found here:
|
176
|
+
#
|
177
|
+
# http://eric.lubow.org/2010/ruby/multiple-input-locations-from-bash-into-ruby/
|
178
|
+
#
|
179
|
+
|
180
|
+
writer = BioTable::TableWriter::Writer.new(options[:format])
|
181
|
+
|
182
|
+
if INPUT_ON_STDIN
|
183
|
+
opts = options.dup # so we can modify options
|
184
|
+
BioTable::TableLoader.emit(STDIN, opts).each do |row|
|
185
|
+
writer.write(TableRow.new(row[0],row[1..-1]))
|
186
|
+
end
|
187
|
+
options[:write_header] = false # don't write the header for chained files
|
188
|
+
end
|
189
|
+
|
162
190
|
ARGV.each do | fn |
|
163
|
-
|
164
|
-
|
191
|
+
opts = options.dup # so we can modify options
|
192
|
+
f = File.open(fn,"r")
|
193
|
+
if not opts[:in_format] and fn =~ /\.csv$/
|
194
|
+
logger.debug "Autodetected CSV file"
|
195
|
+
opts[:in_format] = :csv
|
196
|
+
end
|
197
|
+
BioTable::TableLoader.emit(f, opts).each do |row|
|
198
|
+
writer.write(TableRow.new(row[0],row[1..-1]))
|
199
|
+
end
|
165
200
|
options[:write_header] = false # don't write the header for chained files
|
166
201
|
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
Feature: Iterate through an input table
|
2
|
+
|
3
|
+
bio-table should read input line by line as an iterator, and emit
|
4
|
+
filtered/transformed output
|
5
|
+
|
6
|
+
Scenario: Iterate through a table
|
7
|
+
Given I want to iterate a comma separated table
|
8
|
+
"""
|
9
|
+
#Gene,AJ,B6,Axb1,Axb2,Axb4,Axb12,AXB13,Axb15,Axb19,Axb23,Axb24,Bxa1,Bxa2,Bxa4,Bxa7,Bxa8,Bxa12,Bxa11,Bxa13,Bxa15,Bxa16,Axb5,Axb6,Axb8,Axb1,Bxa24,Bxa25,Bxa26,gene_symbol,gene_desc
|
10
|
+
105853,0.06,0,0,0,0,0,0.11,0,0,0,,,0,0,0,0,0,0,0,0,0,0,0,0,,0,,,Mal2,"MAL2 proteolipid protein"
|
11
|
+
105855,236.88,213.95,213.15,253.49,198,231.56,200.96,255.2,214.04,231.46,,,233.23,241.26,237.53,171.87,237.13,162.3,252.13,284.85,188.76,253.43,220.15,305.52,,217.42,,,Nckap1l,"NCK associated protein 1 like,NCK associated protein 1 like,"
|
12
|
+
105859,0,0.14,0,0,0.07,0.04,0,0,0,0,,,0.02,0,0,0,0,0,0.06,0,0,0,0.02,0,,0,,,Csdc2,"RNA-binding protein pippin"
|
13
|
+
105866,0,0,0,0,0,0,0,0,0,0,,,0,0,0,0,0,0,0,0,0,0,0,0,,0,,,Krt72,"keratin 72"
|
14
|
+
"""
|
15
|
+
When I iterate through the table
|
16
|
+
|
data/features/{bio-table-csv-reader-feature.rb → step_definitions/bio-table-csv-reader-feature.rb}
RENAMED
File without changes
|
@@ -0,0 +1,11 @@
|
|
1
|
+
Given /^I want to iterate a comma separated table$/ do |string|
|
2
|
+
@lines = string.split(/\n/).map { |s| s.strip }
|
3
|
+
end
|
4
|
+
|
5
|
+
When /^I iterate through the table$/ do
|
6
|
+
res = []
|
7
|
+
BioTable::TableLoader.emit(@lines, :in_format => :csv).each { |row| res << row }
|
8
|
+
res[3][5].should == "0.07"
|
9
|
+
end
|
10
|
+
|
11
|
+
|
data/lib/bio-table.rb
CHANGED
@@ -18,8 +18,10 @@ require 'bio-table/parser.rb'
|
|
18
18
|
require 'bio-table/formatter.rb'
|
19
19
|
require 'bio-table/tablerow.rb'
|
20
20
|
require 'bio-table/table.rb'
|
21
|
+
require 'bio-table/tableload.rb'
|
21
22
|
require 'bio-table/tablereader.rb'
|
22
23
|
require 'bio-table/tablewriter.rb'
|
24
|
+
require 'bio-table/table_apply.rb'
|
23
25
|
require 'bio-table/diff.rb'
|
24
26
|
require 'bio-table/overlap.rb'
|
25
27
|
require 'bio-table/merge.rb'
|
data/lib/bio-table/filter.rb
CHANGED
@@ -1,5 +1,20 @@
|
|
1
1
|
module BioTable
|
2
2
|
|
3
|
+
class LazyValues
|
4
|
+
def initialize fields
|
5
|
+
@fields = fields
|
6
|
+
@values = [] # cache values
|
7
|
+
end
|
8
|
+
|
9
|
+
def [] index
|
10
|
+
if not @values[index]
|
11
|
+
field = @fields[index]
|
12
|
+
@values[index] = (Filter::valid_number?(field) ? field.to_f : nil )
|
13
|
+
end
|
14
|
+
@values[index]
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
3
18
|
module Filter
|
4
19
|
|
5
20
|
# Create an index to the column headers, so header A,B,C,D with columns
|
@@ -54,7 +69,8 @@ module BioTable
|
|
54
69
|
def Filter::numeric code, fields
|
55
70
|
return true if code == nil
|
56
71
|
if fields
|
57
|
-
values = fields.map { |field| (valid_number?(field) ? field.to_f : nil ) } # FIXME: not so lazy
|
72
|
+
# values = fields.map { |field| (valid_number?(field) ? field.to_f : nil ) } # FIXME: not so lazy
|
73
|
+
values = LazyValues.new(fields)
|
58
74
|
begin
|
59
75
|
eval(code)
|
60
76
|
rescue Exception
|
data/lib/bio-table/overlap.rb
CHANGED
@@ -1 +1,26 @@
|
|
1
1
|
# Overlap module
|
2
|
+
#
|
3
|
+
module BioTable
|
4
|
+
|
5
|
+
module Overlap
|
6
|
+
|
7
|
+
def self.overlap_tables t1, t2, options
|
8
|
+
logger = Bio::Log::LoggerPlus['bio-table']
|
9
|
+
columns = Columns::to_list(options[:overlap])
|
10
|
+
t = Table.new(t1.header)
|
11
|
+
l1 = t1.map { |row| columns.map { |i| row.all_fields[i] } }
|
12
|
+
l2 = t2.map { |row| columns.map { |i| row.all_fields[i] } }
|
13
|
+
logger.warn "Not all selected keys are unique!" if l1.uniq.size != l1.size or l2.uniq.size != l2.size
|
14
|
+
overlap = l2 & l1
|
15
|
+
# p overlap
|
16
|
+
# create index for table 2
|
17
|
+
idx2 = Indexer::create_index(t2,columns)
|
18
|
+
overlap.each do |values|
|
19
|
+
t.push(t2.row_by_columns(columns.zip(values),idx2))
|
20
|
+
end
|
21
|
+
t
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
data/lib/bio-table/table.rb
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
module BioTable
|
2
2
|
|
3
|
+
# In memory table representation - note that the default parser/emitter does not
|
4
|
+
# use this class as it expects all data to be in memory.
|
3
5
|
class Table
|
4
6
|
|
5
7
|
include Enumerable
|
@@ -19,45 +21,27 @@ module BioTable
|
|
19
21
|
@name = File.basename(fn,File.extname(fn))
|
20
22
|
end
|
21
23
|
|
22
|
-
# Read lines (list of string) and add them to the table, setting row
|
23
|
-
# and row fields. The first row is assumed to be the header and
|
24
|
-
# header has been set.
|
24
|
+
# Read lines (list/array of string) and add them to the table, setting row
|
25
|
+
# names and row fields. The first row is assumed to be the header and
|
26
|
+
# ignored if the header has been set.
|
25
27
|
|
26
28
|
def read_lines lines, options = {}
|
27
|
-
|
28
|
-
num_filter = options[:num_filter]
|
29
|
-
@logger.debug "Filtering on #{num_filter}" if num_filter
|
30
|
-
rewrite = options[:rewrite]
|
31
|
-
@logger.debug "Rewrite #{rewrite}" if rewrite
|
32
|
-
use_columns = options[:columns]
|
33
|
-
@logger.debug "Filtering on columns #{use_columns}" if use_columns
|
34
|
-
column_filter = options[:column_filter]
|
35
|
-
@logger.debug "Filtering on column names #{column_filter}" if column_filter
|
36
|
-
include_rownames = options[:with_rownames]
|
37
|
-
@logger.debug "Include row names" if include_rownames
|
38
|
-
first_column = (include_rownames ? 0 : 1)
|
39
|
-
|
40
|
-
# parse the header
|
41
|
-
header = LineParser::parse(lines[0], options[:in_format])
|
42
|
-
Validator::valid_header?(header, @header)
|
43
|
-
@header = header if not @header
|
29
|
+
table_apply = TableApply.new(options)
|
44
30
|
|
45
|
-
|
46
|
-
|
47
|
-
|
31
|
+
header = table_apply.parse_header(lines[0], options)
|
32
|
+
Validator::valid_header?(header, @header) # compare against older header when merging
|
33
|
+
column_index,header = table_apply.column_index(header) # we may rewrite the header
|
34
|
+
@header = header if not @header
|
48
35
|
|
49
36
|
# parse the rest
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
@rownames << rowname if not include_rownames # otherwise doubles rownames
|
60
|
-
@rows << data_fields
|
37
|
+
prev_line = @header[1..-1]
|
38
|
+
(lines[1..-1]).each_with_index do | line, line_num |
|
39
|
+
rowname, data_fields = table_apply.parse_row(line_num, line, column_index, prev_line, options)
|
40
|
+
if data_fields
|
41
|
+
@rownames << rowname if not options[:with_rownames] # otherwise doubles rownames
|
42
|
+
@rows << data_fields if data_fields
|
43
|
+
end
|
44
|
+
prev_line = data_fields
|
61
45
|
end
|
62
46
|
end
|
63
47
|
|
@@ -82,7 +66,7 @@ module BioTable
|
|
82
66
|
formatter.write(@header) if options[:write_header]
|
83
67
|
each do | tablerow |
|
84
68
|
# p tablerow
|
85
|
-
formatter.write(tablerow.all_fields) if tablerow.
|
69
|
+
formatter.write(tablerow.all_fields) if tablerow.all_valid?
|
86
70
|
end
|
87
71
|
end
|
88
72
|
|
@@ -0,0 +1,49 @@
|
|
1
|
+
module BioTable
|
2
|
+
|
3
|
+
# Apply filters/rewrite etc. to a table, visiting a row at a time. For optimization
|
4
|
+
# this class contains some state
|
5
|
+
class TableApply
|
6
|
+
|
7
|
+
def initialize options
|
8
|
+
@logger = Bio::Log::LoggerPlus['bio-table']
|
9
|
+
|
10
|
+
@num_filter = options[:num_filter]
|
11
|
+
@logger.debug "Filtering on #{@num_filter}" if @num_filter
|
12
|
+
@rewrite = options[:rewrite]
|
13
|
+
@logger.debug "Rewrite #{@rewrite}" if @rewrite
|
14
|
+
@use_columns = options[:columns]
|
15
|
+
@logger.debug "Filtering on columns #{@use_columns}" if @use_columns
|
16
|
+
@column_filter = options[:column_filter]
|
17
|
+
@logger.debug "Filtering on column names #{@column_filter}" if @column_filter
|
18
|
+
@include_rownames = options[:with_rownames]
|
19
|
+
@logger.debug "Include row names" if @include_rownames
|
20
|
+
@first_column = (@include_rownames ? 0 : 1)
|
21
|
+
end
|
22
|
+
|
23
|
+
def parse_header(line, options)
|
24
|
+
LineParser::parse(line, options[:in_format])
|
25
|
+
end
|
26
|
+
|
27
|
+
def column_index(header)
|
28
|
+
column_idx = Filter::create_column_index(@use_columns,header)
|
29
|
+
column_idx = Filter::filter_column_index(column_idx,header,@column_filter)
|
30
|
+
new_header = Filter::apply_column_filter(header,column_idx)
|
31
|
+
return column_idx, new_header
|
32
|
+
end
|
33
|
+
|
34
|
+
def parse_row(line_num, line, column_idx, last_fields, options)
|
35
|
+
fields = LineParser::parse(line, options[:in_format])
|
36
|
+
fields = Filter::apply_column_filter(fields,column_idx)
|
37
|
+
return nil,nil if fields == []
|
38
|
+
rowname = fields[0]
|
39
|
+
data_fields = fields[@first_column..-1]
|
40
|
+
if data_fields.size > 0
|
41
|
+
return nil,nil if not Validator::valid_row?(line_num, data_fields, last_fields)
|
42
|
+
return nil,nil if not Filter::numeric(@num_filter,data_fields)
|
43
|
+
(rowname, data_fields) = Rewrite::rewrite(@rewrite,rowname,data_fields)
|
44
|
+
end
|
45
|
+
return rowname, data_fields
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module BioTable
|
2
|
+
|
3
|
+
module TableLoader
|
4
|
+
|
5
|
+
# Emit a row at a time, using generator as input (the generator should have
|
6
|
+
# an 'each' method) and apply the filters etc. defined in options
|
7
|
+
#
|
8
|
+
# Note that this class does not hold data in memory(!)
|
9
|
+
#
|
10
|
+
# Note that you need to pass in :with_header to get the header row
|
11
|
+
def TableLoader::emit generator, options
|
12
|
+
table_apply = TableApply.new(options)
|
13
|
+
column_index = nil, prev_line = nil
|
14
|
+
Enumerator.new { |yielder|
|
15
|
+
# fields = LineParser::parse(line,options[:in_format])
|
16
|
+
generator.each_with_index do |line, line_num|
|
17
|
+
# p [line_num, line]
|
18
|
+
if line_num == 0
|
19
|
+
header = table_apply.parse_header(line, options)
|
20
|
+
# Validator::valid_header?(header, @header) # compare against older header when merging
|
21
|
+
column_index,header = table_apply.column_index(header) # we may rewrite the header
|
22
|
+
yielder.yield header if options[:write_header] != false
|
23
|
+
prev_line = header[1..-1]
|
24
|
+
else
|
25
|
+
rowname, data_fields = table_apply.parse_row(line_num, line, column_index, prev_line, options)
|
26
|
+
if data_fields
|
27
|
+
list = []
|
28
|
+
list << rowname if not options[:with_rownames] # otherwise doubles rownames
|
29
|
+
list += data_fields if data_fields
|
30
|
+
yielder.yield list
|
31
|
+
prev_line = data_fields
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
}
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
39
|
+
end
|
data/lib/bio-table/tablerow.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
module BioTable
|
2
2
|
|
3
|
+
# Abstraction of a parsed table row
|
3
4
|
class TableRow
|
4
5
|
attr_reader :rowname, :fields
|
5
6
|
def initialize rowname, fields = []
|
@@ -15,6 +16,10 @@ module BioTable
|
|
15
16
|
([@rowname] << @fields).flatten
|
16
17
|
end
|
17
18
|
|
19
|
+
def all_valid?
|
20
|
+
all_fields != nil and all_fields.size > 0
|
21
|
+
end
|
22
|
+
|
18
23
|
def valid?
|
19
24
|
fields != nil and fields.size > 0
|
20
25
|
end
|
data/lib/bio-table/validator.rb
CHANGED
@@ -12,12 +12,12 @@ module BioTable
|
|
12
12
|
true
|
13
13
|
end
|
14
14
|
|
15
|
-
def Validator::valid_row?
|
15
|
+
def Validator::valid_row? line_number, fields, last_fields
|
16
16
|
return false if fields == nil or fields.size == 0
|
17
|
-
if
|
18
|
-
p
|
17
|
+
if last_fields.size>0 and (fields.size != last_fields.size)
|
18
|
+
p last_fields
|
19
19
|
p fields
|
20
|
-
throw "Number of fields diverge in line #{
|
20
|
+
throw "Number of fields diverge in line #{line_number} (size #{fields.size}, expected #{last_fields.size})"
|
21
21
|
end
|
22
22
|
true
|
23
23
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-table
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-07-
|
12
|
+
date: 2012-07-26 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bio-logger
|
16
|
-
requirement: &
|
16
|
+
requirement: &27789060 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *27789060
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: rspec
|
27
|
-
requirement: &
|
27
|
+
requirement: &27788560 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: 2.8.0
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *27788560
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: rdoc
|
38
|
-
requirement: &
|
38
|
+
requirement: &27788060 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ~>
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: '3.12'
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *27788060
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: cucumber
|
49
|
-
requirement: &
|
49
|
+
requirement: &27787540 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ! '>='
|
@@ -54,10 +54,10 @@ dependencies:
|
|
54
54
|
version: '0'
|
55
55
|
type: :development
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *27787540
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: bundler
|
60
|
-
requirement: &
|
60
|
+
requirement: &27787060 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ! '>'
|
@@ -65,10 +65,10 @@ dependencies:
|
|
65
65
|
version: 1.0.0
|
66
66
|
type: :development
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *27787060
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: jeweler
|
71
|
-
requirement: &
|
71
|
+
requirement: &25379120 !ruby/object:Gem::Requirement
|
72
72
|
none: false
|
73
73
|
requirements:
|
74
74
|
- - ~>
|
@@ -76,10 +76,10 @@ dependencies:
|
|
76
76
|
version: 1.8.3
|
77
77
|
type: :development
|
78
78
|
prerelease: false
|
79
|
-
version_requirements: *
|
79
|
+
version_requirements: *25379120
|
80
80
|
- !ruby/object:Gem::Dependency
|
81
81
|
name: bio
|
82
|
-
requirement: &
|
82
|
+
requirement: &25378640 !ruby/object:Gem::Requirement
|
83
83
|
none: false
|
84
84
|
requirements:
|
85
85
|
- - ! '>='
|
@@ -87,10 +87,10 @@ dependencies:
|
|
87
87
|
version: 1.4.2
|
88
88
|
type: :development
|
89
89
|
prerelease: false
|
90
|
-
version_requirements: *
|
90
|
+
version_requirements: *25378640
|
91
91
|
- !ruby/object:Gem::Dependency
|
92
92
|
name: rdoc
|
93
|
-
requirement: &
|
93
|
+
requirement: &25378160 !ruby/object:Gem::Requirement
|
94
94
|
none: false
|
95
95
|
requirements:
|
96
96
|
- - ~>
|
@@ -98,7 +98,7 @@ dependencies:
|
|
98
98
|
version: '3.12'
|
99
99
|
type: :development
|
100
100
|
prerelease: false
|
101
|
-
version_requirements: *
|
101
|
+
version_requirements: *25378160
|
102
102
|
description: Functions and tools for tranforming and changing tab delimited and comma
|
103
103
|
separated table files - useful for Excel sheets and SQL/RDF output
|
104
104
|
email: pjotr.public01@thebird.nl
|
@@ -118,9 +118,11 @@ files:
|
|
118
118
|
- Rakefile
|
119
119
|
- VERSION
|
120
120
|
- bin/bio-table
|
121
|
-
- features/bio-table-csv-reader-feature.rb
|
122
121
|
- features/bio-table-csv-reader.feature
|
122
|
+
- features/iterate_table.feature
|
123
|
+
- features/step_definitions/bio-table-csv-reader-feature.rb
|
123
124
|
- features/step_definitions/bio-table_steps.rb
|
125
|
+
- features/step_definitions/iterate_table.rb
|
124
126
|
- features/support/env.rb
|
125
127
|
- lib/bio-table.rb
|
126
128
|
- lib/bio-table/columns.rb
|
@@ -133,6 +135,8 @@ files:
|
|
133
135
|
- lib/bio-table/parser.rb
|
134
136
|
- lib/bio-table/rewrite.rb
|
135
137
|
- lib/bio-table/table.rb
|
138
|
+
- lib/bio-table/table_apply.rb
|
139
|
+
- lib/bio-table/tableload.rb
|
136
140
|
- lib/bio-table/tablereader.rb
|
137
141
|
- lib/bio-table/tablerow.rb
|
138
142
|
- lib/bio-table/tablewriter.rb
|
@@ -156,7 +160,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
156
160
|
version: '0'
|
157
161
|
segments:
|
158
162
|
- 0
|
159
|
-
hash: -
|
163
|
+
hash: -1964296020152295186
|
160
164
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
161
165
|
none: false
|
162
166
|
requirements:
|
@@ -168,5 +172,5 @@ rubyforge_project:
|
|
168
172
|
rubygems_version: 1.8.10
|
169
173
|
signing_key:
|
170
174
|
specification_version: 3
|
171
|
-
summary: Swiss knife of tabulated data; transforming/filtering tab/csv files
|
175
|
+
summary: Swiss army knife of tabulated data; transforming/filtering tab/csv files
|
172
176
|
test_files: []
|