csv_filter 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.2
1
+ 0.2.0
data/csv_filter.gemspec CHANGED
@@ -5,7 +5,7 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "csv_filter"
8
- s.version = "0.1.2"
8
+ s.version = "0.2.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Kris Luminar"]
data/lib/csv_filter.rb CHANGED
@@ -1,8 +1,58 @@
1
1
  # @author Kris Luminar
2
- # @email kris.luminar@gmail.com
3
2
  class CsvFilter
4
3
  # @param file_path the full path to a file
5
- def initialize file_path
4
+ def initialize file_path, separator = "\t"
6
5
  @file = File.open(file_path, 'r')
6
+ @separator = separator
7
+ @num_columns = count_columns
8
+ @header = {}
9
+ grab_header
7
10
  end
11
+
12
+ def grab_header
13
+ return @header if (@header and !@header.empty?)
14
+ @file.rewind
15
+ fields.each_with_index do |field_name, i|
16
+ @header[i]= field_name.strip
17
+ end
18
+ @header
19
+ end
20
+
21
+ def filtered_column_positions columns
22
+ columns = columns.flatten
23
+ @filtered_column_positions ||= register.select {|field, pos| columns.include? field }.values
24
+ end
25
+
26
+ def register
27
+ grab_header.invert
28
+ end
29
+
30
+ def filter(*columns)
31
+ # columns = [*columns] #columns should accept either an array of strings or a variable number of strings
32
+ raise ArgumentError unless (columns.respond_to?(:size) and columns.size < @num_columns)
33
+ output = []
34
+ @file.each_with_index do |line, i|
35
+ #TODO: Decide whether to allow user to specify if header row exists. If so, this step will be conditional. Else, add proviso to the README that csv file must include a header line.
36
+ next if i == 0 # skip header row
37
+ row = {}
38
+ line.split(@separator).each_with_index do |value, j|
39
+ if filtered_column_positions(columns).include? j
40
+ row[@header[j]] = value
41
+ end
42
+ output << row
43
+ end
44
+ end
45
+ output
46
+ end
47
+
48
+ def count_columns
49
+ fields.size
50
+ end
51
+
52
+ def fields
53
+ return @fields if @fields
54
+ @file.rewind
55
+ @fields = @file.gets.split(@separator).map &:strip
56
+ end
57
+
8
58
  end
@@ -5,11 +5,44 @@ describe "CsvFilter" do
5
5
  @tsv = CsvFilter.new(File.expand_path(File.dirname(__FILE__) + '/sample.tsv'))
6
6
  end
7
7
 
8
- it "should grab just the columns specified" do
9
- columns = @tsv.filter('value', 'url')
10
- (columns.gets =~ 'value').should be_true
8
+ it "#filter should grab just the columns specified" do
9
+ fields = ['value', 'url']
10
+ columns = @tsv.filter(fields)
11
+ columns.first.keys.should eq fields
11
12
  end
12
13
 
14
+ it "should know how many columns exist" do
15
+ @tsv.count_columns.should be 4
16
+ end
17
+
18
+ # it "#filter should accept either an array of strings or a variable number of strings" do
19
+ # fields = ['value', 'url']
20
+ # columns = @tsv.filter(fields)
21
+ # columns.first.keys.should eq fields
22
+
23
+ # columns = @tsv.filter('value', 'url')
24
+ # columns.first.keys.should eq fields
25
+ # end
26
+
27
+ it "should retrieve the header row" do
28
+ header = @tsv.grab_header
29
+ header.should be_an_instance_of(Hash)
30
+ header.values.should eq @tsv.fields
31
+ end
32
+
33
+ it "should know the position of columns based on name" do
34
+ @tsv.filtered_column_positions(['value', 'url']).should eq [0, 2]
35
+ end
36
+
37
+ it "should know the columns and their positions" do
38
+ register = @tsv.register
39
+ register.should be_an_instance_of Hash
40
+ register.keys.should eq @tsv.fields
41
+ register['url'].should eq 2
42
+ end
43
+
44
+ it "should return a warning if all args not in header line" #exit code and puts warning rather than exception
45
+
13
46
  it "should grep for a string in rows"
14
47
  describe "console output"
15
48
  it "should send output to stndout"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csv_filter
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.2.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -131,7 +131,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
131
131
  version: '0'
132
132
  segments:
133
133
  - 0
134
- hash: -776162061989875452
134
+ hash: -429245302767386299
135
135
  required_rubygems_version: !ruby/object:Gem::Requirement
136
136
  none: false
137
137
  requirements: