sycsvpro 0.1.10 → 0.1.11

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- sycsvpro (0.1.10)
4
+ sycsvpro (0.1.11)
5
5
  gli (= 2.9.0)
6
6
  timeleap (~> 0.0.1)
7
7
 
data/README.md CHANGED
@@ -5,6 +5,8 @@ Processing of csv files. *sycsvpro* offers following functions
5
5
 
6
6
  * analyze a csv file
7
7
  * extract rows and columns from a file
8
+ * remove duplicate lines from a file where duplicates are identified by key
9
+ columns (since version 0.1.11)
8
10
  * collect values of rows and assign them to categories
9
11
  * map column values to new values
10
12
  * allocate column values to a key column (since version 0.0.4)
@@ -76,6 +78,14 @@ Extract row 1,2 and 10-20 as well as columns 4 and 6-7
76
78
 
77
79
  $ sycsvpro -f in.csv -o out.csv extract -r 1,2,10-20 -c 4,6-7
78
80
 
81
+
82
+ Unique
83
+ ------
84
+ Remove duplicate lines from a file. Duplicates are identified by key columns.
85
+ If no key columns are provided the whole line is checked for uniqueness
86
+
87
+ $ sycsvpro -f in.csv -o out.csv unique -r 1,2,8-12 -c 4,10-15 -k 0,1
88
+
79
89
  Collect
80
90
  -------
81
91
  Collect all product rows (2, 3 and 4) to the category product
@@ -473,6 +483,11 @@ Version 0.1.10
473
483
  * Fix ~/.syc/sycsvpro system directory creation when no .syc directory is
474
484
  available
475
485
 
486
+ Version 0.1.11
487
+ --------------
488
+ * Unique removes duplicate lines from the infile. Duplicate lines are identified
489
+ by key columns
490
+
476
491
  Installation
477
492
  ============
478
493
  [![Gem Version](https://badge.fury.io/rb/sycsvpro.png)](http://badge.fury.io/rb/sycsvpro)
@@ -0,0 +1,77 @@
1
+ require 'set'
2
+
3
+ # Operating csv files
4
+ module Sycsvpro
5
+
6
+ # Removes copies of rows identified by key values
7
+ #
8
+ # | Name | Street | Town | Country |
9
+ # | ---- | ------ | ---- | ------- |
10
+ # | Jane | Canal | Win | CA |
11
+ # | Jack | Long | Van | CA |
12
+ # | Jean | Sing | Ma | DE |
13
+ # | Jane | Canal | Win | CA |
14
+ #
15
+ # Remove copies based on column 0 (Name)
16
+ #
17
+ # | Name | Street | Town | Country |
18
+ # | ---- | ------ | ---- | ------- |
19
+ # | Jane | Canal | Win | CA |
20
+ # | Jack | Long | Van | CA |
21
+ # | Jean | Sing | Ma | DE |
22
+ class Unique
23
+
24
+ include Dsl
25
+
26
+ # infile contains the data that is operated on
27
+ attr_reader :infile
28
+ # outfile is the file where the result is written to
29
+ attr_reader :outfile
30
+ # filter that is used for rows
31
+ attr_reader :row_filter
32
+ # filter that is used for columns
33
+ attr_reader :col_filter
34
+
35
+ # Creates a new Unique
36
+ # :call-seq:
37
+ # Sycsvpro::Unique.new(infile: "infile.csv",
38
+ # outfile: "outfile.csv",
39
+ # rows: "1,3-4",
40
+ # cols: "0,2,4-6",
41
+ # key: "0,1").execute
42
+ def initialize(options = {})
43
+ @infile = options[:infile]
44
+ @outfile = options[:outfile]
45
+ @row_filter = RowFilter.new(options[:rows], df: options[:df])
46
+ @col_filter = ColumnFilter.new(options[:cols], df: options[:df])
47
+ @key_filter = ColumnFilter.new(options[:key], df: options[:df])
48
+ @keys = Set.new
49
+ end
50
+
51
+ # Removes the duplicates from infile and writes the result to outfile
52
+ def execute
53
+ File.open(@outfile, 'w') do |out|
54
+ File.open(@infile, 'r').each_with_index do |line, index|
55
+ line = line.chomp
56
+
57
+ next if line.empty?
58
+
59
+ line = unstring(line).chomp
60
+
61
+ extraction = col_filter.process(row_filter.process(line, row: index))
62
+
63
+ next unless extraction
64
+
65
+ key = @key_filter.process(line)
66
+
67
+ unless @keys.include? key
68
+ out.puts extraction
69
+ @keys << key
70
+ end
71
+ end
72
+ end
73
+ end
74
+
75
+ end
76
+
77
+ end
@@ -1,5 +1,5 @@
1
1
  # Operating csv files
2
2
  module Sycsvpro
3
3
  # Version number of sycsvpro
4
- VERSION = '0.1.10'
4
+ VERSION = '0.1.11'
5
5
  end
@@ -0,0 +1,42 @@
1
+ require 'sycsvpro/unique'
2
+
3
+ module Sycsvpro
4
+
5
+ describe Unique do
6
+
7
+ before do
8
+ @infile = File.join(File.dirname(__FILE__), "files/customer-address.csv")
9
+ @outfile = File.join(File.dirname(__FILE__), "files/out.csv")
10
+ end
11
+
12
+ it "should remove copies" do
13
+
14
+ rows = "0-10"
15
+ cols = "0,1-3"
16
+ key = "0,1"
17
+
18
+ Sycsvpro::Unique.new(infile: @infile,
19
+ outfile: @outfile,
20
+ rows: rows,
21
+ cols: cols,
22
+ key: key).execute
23
+
24
+ result = [ "Name;Street;Town;Country",
25
+ "Jane;Canal;Vancouver;CA",
26
+ "John;Milton;Washington;US",
27
+ "Jne;Canal;Vancouver;CA",
28
+ "Jhn;Milton;Washington DC;US" ]
29
+
30
+ rows = 0
31
+
32
+ File.open(@outfile).each_with_index do |line, index|
33
+ line.chomp.should eq result[index]
34
+ rows += 1
35
+ end
36
+
37
+ rows.should eq result.count
38
+ end
39
+
40
+ end
41
+
42
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sycsvpro
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.10
4
+ version: 0.1.11
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-07-05 00:00:00.000000000 Z
12
+ date: 2014-07-09 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
@@ -151,6 +151,7 @@ files:
151
151
  - lib/sycsvpro/script_list.rb
152
152
  - lib/sycsvpro/sorter.rb
153
153
  - lib/sycsvpro/table.rb
154
+ - lib/sycsvpro/unique.rb
154
155
  - lib/sycsvpro/version.rb
155
156
  - spec/sycsvpro/aggregator_spec.rb
156
157
  - spec/sycsvpro/allocator_spec.rb
@@ -174,6 +175,7 @@ files:
174
175
  - spec/sycsvpro/script_list_spec.rb
175
176
  - spec/sycsvpro/sorter_spec.rb
176
177
  - spec/sycsvpro/table_spec.rb
178
+ - spec/sycsvpro/unique_spec.rb
177
179
  - sycsvpro.gemspec
178
180
  - sycsvpro.rdoc
179
181
  homepage: https://github.com/sugaryourcoffee/syc-svpro