sycsvpro 0.1.10 → 0.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- sycsvpro (0.1.10)
4
+ sycsvpro (0.1.11)
5
5
  gli (= 2.9.0)
6
6
  timeleap (~> 0.0.1)
7
7
 
data/README.md CHANGED
@@ -5,6 +5,8 @@ Processing of csv files. *sycsvpro* offers following functions
5
5
 
6
6
  * analyze a csv file
7
7
  * extract rows and columns from a file
8
+ * remove duplicate lines from a file where duplicates are identified by key
9
+ columns (since version 0.1.11)
8
10
  * collect values of rows and assign them to categories
9
11
  * map column values to new values
10
12
  * allocate column values to a key column (since version 0.0.4)
@@ -76,6 +78,14 @@ Extract row 1,2 and 10-20 as well as columns 4 and 6-7
76
78
 
77
79
  $ sycsvpro -f in.csv -o out.csv extract -r 1,2,10-20 -c 4,6-7
78
80
 
81
+
82
+ Unique
83
+ ------
84
+ Remove duplicate lines from a file. Duplicates are identified by key columns.
85
+ If no key columns are provided the whole line is checked for uniqueness
86
+
87
+ $ sycsvpro -f in.csv -o out.csv unique -r 1,2,8-12 -c 4,10-15 -k 0,1
88
+
79
89
  Collect
80
90
  -------
81
91
  Collect all product rows (2, 3 and 4) to the category product
@@ -473,6 +483,11 @@ Version 0.1.10
473
483
  * Fix ~/.syc/sycsvpro system directory creation when no .syc directory is
474
484
  available
475
485
 
486
+ Version 0.1.11
487
+ --------------
488
+ * Unique removes duplicate lines from the infile. Duplicate lines are identified
489
+ by key columns
490
+
476
491
  Installation
477
492
  ============
478
493
  [![Gem Version](https://badge.fury.io/rb/sycsvpro.png)](http://badge.fury.io/rb/sycsvpro)
@@ -0,0 +1,77 @@
1
+ require 'set'
2
+
3
+ # Operating csv files
4
+ module Sycsvpro
5
+
6
+ # Removes copies of rows identified by key values
7
+ #
8
+ # | Name | Street | Town | Country |
9
+ # | ---- | ------ | ---- | ------- |
10
+ # | Jane | Canal | Win | CA |
11
+ # | Jack | Long | Van | CA |
12
+ # | Jean | Sing | Ma | DE |
13
+ # | Jane | Canal | Win | CA |
14
+ #
15
+ # Remove copies based on column 0 (Name)
16
+ #
17
+ # | Name | Street | Town | Country |
18
+ # | ---- | ------ | ---- | ------- |
19
+ # | Jane | Canal | Win | CA |
20
+ # | Jack | Long | Van | CA |
21
+ # | Jean | Sing | Ma | DE |
22
+ class Unique
23
+
24
+ include Dsl
25
+
26
+ # infile contains the data that is operated on
27
+ attr_reader :infile
28
+ # outfile is the file where the result is written to
29
+ attr_reader :outfile
30
+ # filter that is used for rows
31
+ attr_reader :row_filter
32
+ # filter that is used for columns
33
+ attr_reader :col_filter
34
+
35
+ # Creates a new Unique
36
+ # :call-seq:
37
+ # Sycsvpro::Unique.new(infile: "infile.csv",
38
+ # outfile: "outfile.csv",
39
+ # rows: "1,3-4",
40
+ # cols: "0,2,4-6",
41
+ # key: "0,1").execute
42
+ def initialize(options = {})
43
+ @infile = options[:infile]
44
+ @outfile = options[:outfile]
45
+ @row_filter = RowFilter.new(options[:rows], df: options[:df])
46
+ @col_filter = ColumnFilter.new(options[:cols], df: options[:df])
47
+ @key_filter = ColumnFilter.new(options[:key], df: options[:df])
48
+ @keys = Set.new
49
+ end
50
+
51
+ # Removes the duplicates from infile and writes the result to outfile
52
+ def execute
53
+ File.open(@outfile, 'w') do |out|
54
+ File.open(@infile, 'r').each_with_index do |line, index|
55
+ line = line.chomp
56
+
57
+ next if line.empty?
58
+
59
+ line = unstring(line).chomp
60
+
61
+ extraction = col_filter.process(row_filter.process(line, row: index))
62
+
63
+ next unless extraction
64
+
65
+ key = @key_filter.process(line)
66
+
67
+ unless @keys.include? key
68
+ out.puts extraction
69
+ @keys << key
70
+ end
71
+ end
72
+ end
73
+ end
74
+
75
+ end
76
+
77
+ end
@@ -1,5 +1,5 @@
1
1
  # Operating csv files
2
2
  module Sycsvpro
3
3
  # Version number of sycsvpro
4
- VERSION = '0.1.10'
4
+ VERSION = '0.1.11'
5
5
  end
@@ -0,0 +1,42 @@
1
+ require 'sycsvpro/unique'
2
+
3
+ module Sycsvpro
4
+
5
+ describe Unique do
6
+
7
+ before do
8
+ @infile = File.join(File.dirname(__FILE__), "files/customer-address.csv")
9
+ @outfile = File.join(File.dirname(__FILE__), "files/out.csv")
10
+ end
11
+
12
+ it "should remove copies" do
13
+
14
+ rows = "0-10"
15
+ cols = "0,1-3"
16
+ key = "0,1"
17
+
18
+ Sycsvpro::Unique.new(infile: @infile,
19
+ outfile: @outfile,
20
+ rows: rows,
21
+ cols: cols,
22
+ key: key).execute
23
+
24
+ result = [ "Name;Street;Town;Country",
25
+ "Jane;Canal;Vancouver;CA",
26
+ "John;Milton;Washington;US",
27
+ "Jne;Canal;Vancouver;CA",
28
+ "Jhn;Milton;Washington DC;US" ]
29
+
30
+ rows = 0
31
+
32
+ File.open(@outfile).each_with_index do |line, index|
33
+ line.chomp.should eq result[index]
34
+ rows += 1
35
+ end
36
+
37
+ rows.should eq result.count
38
+ end
39
+
40
+ end
41
+
42
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sycsvpro
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.10
4
+ version: 0.1.11
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-07-05 00:00:00.000000000 Z
12
+ date: 2014-07-09 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
@@ -151,6 +151,7 @@ files:
151
151
  - lib/sycsvpro/script_list.rb
152
152
  - lib/sycsvpro/sorter.rb
153
153
  - lib/sycsvpro/table.rb
154
+ - lib/sycsvpro/unique.rb
154
155
  - lib/sycsvpro/version.rb
155
156
  - spec/sycsvpro/aggregator_spec.rb
156
157
  - spec/sycsvpro/allocator_spec.rb
@@ -174,6 +175,7 @@ files:
174
175
  - spec/sycsvpro/script_list_spec.rb
175
176
  - spec/sycsvpro/sorter_spec.rb
176
177
  - spec/sycsvpro/table_spec.rb
178
+ - spec/sycsvpro/unique_spec.rb
177
179
  - sycsvpro.gemspec
178
180
  - sycsvpro.rdoc
179
181
  homepage: https://github.com/sugaryourcoffee/syc-svpro