csv_filter 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/csv_filter.gemspec +1 -1
- data/lib/csv_filter.rb +52 -2
- data/spec/csv_filter_spec.rb +36 -3
- metadata +2 -2
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.2.0
|
data/csv_filter.gemspec
CHANGED
data/lib/csv_filter.rb
CHANGED
@@ -1,8 +1,58 @@
|
|
1
1
|
# @author Kris Luminar
|
2
|
-
# @email kris.luminar@gmail.com
|
3
2
|
class CsvFilter
|
4
3
|
# @param file_path the full path to a file
|
5
|
-
def initialize file_path
|
4
|
+
def initialize file_path, separator = "\t"
|
6
5
|
@file = File.open(file_path, 'r')
|
6
|
+
@separator = separator
|
7
|
+
@num_columns = count_columns
|
8
|
+
@header = {}
|
9
|
+
grab_header
|
7
10
|
end
|
11
|
+
|
12
|
+
def grab_header
|
13
|
+
return @header if (@header and !@header.empty?)
|
14
|
+
@file.rewind
|
15
|
+
fields.each_with_index do |field_name, i|
|
16
|
+
@header[i]= field_name.strip
|
17
|
+
end
|
18
|
+
@header
|
19
|
+
end
|
20
|
+
|
21
|
+
def filtered_column_positions columns
|
22
|
+
columns = columns.flatten
|
23
|
+
@filtered_column_positions ||= register.select {|field, pos| columns.include? field }.values
|
24
|
+
end
|
25
|
+
|
26
|
+
def register
|
27
|
+
grab_header.invert
|
28
|
+
end
|
29
|
+
|
30
|
+
def filter(*columns)
|
31
|
+
# columns = [*columns] #columns should accept either an array of strings or a variable number of strings
|
32
|
+
raise ArgumentError unless (columns.respond_to?(:size) and columns.size < @num_columns)
|
33
|
+
output = []
|
34
|
+
@file.each_with_index do |line, i|
|
35
|
+
#TODO: Decide whether to allow user to specify if header row exists. If so, this step will be conditional. Else, add proviso to the README that csv file must include a header line.
|
36
|
+
next if i == 0 # skip header row
|
37
|
+
row = {}
|
38
|
+
line.split(@separator).each_with_index do |value, j|
|
39
|
+
if filtered_column_positions(columns).include? j
|
40
|
+
row[@header[j]] = value
|
41
|
+
end
|
42
|
+
output << row
|
43
|
+
end
|
44
|
+
end
|
45
|
+
output
|
46
|
+
end
|
47
|
+
|
48
|
+
def count_columns
|
49
|
+
fields.size
|
50
|
+
end
|
51
|
+
|
52
|
+
def fields
|
53
|
+
return @fields if @fields
|
54
|
+
@file.rewind
|
55
|
+
@fields = @file.gets.split(@separator).map &:strip
|
56
|
+
end
|
57
|
+
|
8
58
|
end
|
data/spec/csv_filter_spec.rb
CHANGED
@@ -5,11 +5,44 @@ describe "CsvFilter" do
|
|
5
5
|
@tsv = CsvFilter.new(File.expand_path(File.dirname(__FILE__) + '/sample.tsv'))
|
6
6
|
end
|
7
7
|
|
8
|
-
it "should grab just the columns specified" do
|
9
|
-
|
10
|
-
|
8
|
+
it "#filter should grab just the columns specified" do
|
9
|
+
fields = ['value', 'url']
|
10
|
+
columns = @tsv.filter(fields)
|
11
|
+
columns.first.keys.should eq fields
|
11
12
|
end
|
12
13
|
|
14
|
+
it "should know how many columns exist" do
|
15
|
+
@tsv.count_columns.should be 4
|
16
|
+
end
|
17
|
+
|
18
|
+
# it "#filter should accept either an array of strings or a variable number of strings" do
|
19
|
+
# fields = ['value', 'url']
|
20
|
+
# columns = @tsv.filter(fields)
|
21
|
+
# columns.first.keys.should eq fields
|
22
|
+
|
23
|
+
# columns = @tsv.filter('value', 'url')
|
24
|
+
# columns.first.keys.should eq fields
|
25
|
+
# end
|
26
|
+
|
27
|
+
it "should retrieve the header row" do
|
28
|
+
header = @tsv.grab_header
|
29
|
+
header.should be_an_instance_of(Hash)
|
30
|
+
header.values.should eq @tsv.fields
|
31
|
+
end
|
32
|
+
|
33
|
+
it "should know the position of columns based on name" do
|
34
|
+
@tsv.filtered_column_positions(['value', 'url']).should eq [0, 2]
|
35
|
+
end
|
36
|
+
|
37
|
+
it "should know the columns and their positions" do
|
38
|
+
register = @tsv.register
|
39
|
+
register.should be_an_instance_of Hash
|
40
|
+
register.keys.should eq @tsv.fields
|
41
|
+
register['url'].should eq 2
|
42
|
+
end
|
43
|
+
|
44
|
+
it "should return a warning if all args not in header line" #exit code and puts warning rather than exception
|
45
|
+
|
13
46
|
it "should grep for a string in rows"
|
14
47
|
describe "console output"
|
15
48
|
it "should send output to stndout"
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csv_filter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -131,7 +131,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
131
131
|
version: '0'
|
132
132
|
segments:
|
133
133
|
- 0
|
134
|
-
hash: -
|
134
|
+
hash: -429245302767386299
|
135
135
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
136
136
|
none: false
|
137
137
|
requirements:
|