csv_filter 0.1.2 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/csv_filter.gemspec +1 -1
- data/lib/csv_filter.rb +52 -2
- data/spec/csv_filter_spec.rb +36 -3
- metadata +2 -2
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.2.0
|
data/csv_filter.gemspec
CHANGED
data/lib/csv_filter.rb
CHANGED
@@ -1,8 +1,58 @@
|
|
1
1
|
# @author Kris Luminar
|
2
|
-
# @email kris.luminar@gmail.com
|
3
2
|
class CsvFilter
|
4
3
|
# @param file_path the full path to a file
|
5
|
-
def initialize file_path
|
4
|
+
def initialize file_path, separator = "\t"
|
6
5
|
@file = File.open(file_path, 'r')
|
6
|
+
@separator = separator
|
7
|
+
@num_columns = count_columns
|
8
|
+
@header = {}
|
9
|
+
grab_header
|
7
10
|
end
|
11
|
+
|
12
|
+
def grab_header
|
13
|
+
return @header if (@header and !@header.empty?)
|
14
|
+
@file.rewind
|
15
|
+
fields.each_with_index do |field_name, i|
|
16
|
+
@header[i]= field_name.strip
|
17
|
+
end
|
18
|
+
@header
|
19
|
+
end
|
20
|
+
|
21
|
+
def filtered_column_positions columns
|
22
|
+
columns = columns.flatten
|
23
|
+
@filtered_column_positions ||= register.select {|field, pos| columns.include? field }.values
|
24
|
+
end
|
25
|
+
|
26
|
+
def register
|
27
|
+
grab_header.invert
|
28
|
+
end
|
29
|
+
|
30
|
+
def filter(*columns)
|
31
|
+
# columns = [*columns] #columns should accept either an array of strings or a variable number of strings
|
32
|
+
raise ArgumentError unless (columns.respond_to?(:size) and columns.size < @num_columns)
|
33
|
+
output = []
|
34
|
+
@file.each_with_index do |line, i|
|
35
|
+
#TODO: Decide whether to allow user to specify if header row exists. If so, this step will be conditional. Else, add proviso to the README that csv file must include a header line.
|
36
|
+
next if i == 0 # skip header row
|
37
|
+
row = {}
|
38
|
+
line.split(@separator).each_with_index do |value, j|
|
39
|
+
if filtered_column_positions(columns).include? j
|
40
|
+
row[@header[j]] = value
|
41
|
+
end
|
42
|
+
output << row
|
43
|
+
end
|
44
|
+
end
|
45
|
+
output
|
46
|
+
end
|
47
|
+
|
48
|
+
def count_columns
|
49
|
+
fields.size
|
50
|
+
end
|
51
|
+
|
52
|
+
def fields
|
53
|
+
return @fields if @fields
|
54
|
+
@file.rewind
|
55
|
+
@fields = @file.gets.split(@separator).map &:strip
|
56
|
+
end
|
57
|
+
|
8
58
|
end
|
data/spec/csv_filter_spec.rb
CHANGED
@@ -5,11 +5,44 @@ describe "CsvFilter" do
|
|
5
5
|
@tsv = CsvFilter.new(File.expand_path(File.dirname(__FILE__) + '/sample.tsv'))
|
6
6
|
end
|
7
7
|
|
8
|
-
it "should grab just the columns specified" do
|
9
|
-
|
10
|
-
|
8
|
+
it "#filter should grab just the columns specified" do
|
9
|
+
fields = ['value', 'url']
|
10
|
+
columns = @tsv.filter(fields)
|
11
|
+
columns.first.keys.should eq fields
|
11
12
|
end
|
12
13
|
|
14
|
+
it "should know how many columns exist" do
|
15
|
+
@tsv.count_columns.should be 4
|
16
|
+
end
|
17
|
+
|
18
|
+
# it "#filter should accept either an array of strings or a variable number of strings" do
|
19
|
+
# fields = ['value', 'url']
|
20
|
+
# columns = @tsv.filter(fields)
|
21
|
+
# columns.first.keys.should eq fields
|
22
|
+
|
23
|
+
# columns = @tsv.filter('value', 'url')
|
24
|
+
# columns.first.keys.should eq fields
|
25
|
+
# end
|
26
|
+
|
27
|
+
it "should retrieve the header row" do
|
28
|
+
header = @tsv.grab_header
|
29
|
+
header.should be_an_instance_of(Hash)
|
30
|
+
header.values.should eq @tsv.fields
|
31
|
+
end
|
32
|
+
|
33
|
+
it "should know the position of columns based on name" do
|
34
|
+
@tsv.filtered_column_positions(['value', 'url']).should eq [0, 2]
|
35
|
+
end
|
36
|
+
|
37
|
+
it "should know the columns and their positions" do
|
38
|
+
register = @tsv.register
|
39
|
+
register.should be_an_instance_of Hash
|
40
|
+
register.keys.should eq @tsv.fields
|
41
|
+
register['url'].should eq 2
|
42
|
+
end
|
43
|
+
|
44
|
+
it "should return a warning if all args not in header line" #exit code and puts warning rather than exception
|
45
|
+
|
13
46
|
it "should grep for a string in rows"
|
14
47
|
describe "console output"
|
15
48
|
it "should send output to stndout"
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csv_filter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -131,7 +131,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
131
131
|
version: '0'
|
132
132
|
segments:
|
133
133
|
- 0
|
134
|
-
hash: -
|
134
|
+
hash: -429245302767386299
|
135
135
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
136
136
|
none: false
|
137
137
|
requirements:
|