sycsvpro 0.1.10 → 0.1.11
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile.lock +1 -1
- data/README.md +15 -0
- data/lib/sycsvpro/unique.rb +77 -0
- data/lib/sycsvpro/version.rb +1 -1
- data/spec/sycsvpro/unique_spec.rb +42 -0
- metadata +4 -2
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -5,6 +5,8 @@ Processing of csv files. *sycsvpro* offers following functions
|
|
5
5
|
|
6
6
|
* analyze a csv file
|
7
7
|
* extract rows and columns from a file
|
8
|
+
* remove duplicate lines from a file where duplicates are identified by key
|
9
|
+
columns (since version 0.1.11)
|
8
10
|
* collect values of rows and assign them to categories
|
9
11
|
* map column values to new values
|
10
12
|
* allocate column values to a key column (since version 0.0.4)
|
@@ -76,6 +78,14 @@ Extract row 1,2 and 10-20 as well as columns 4 and 6-7
|
|
76
78
|
|
77
79
|
$ sycsvpro -f in.csv -o out.csv extract -r 1,2,10-20 -c 4,6-7
|
78
80
|
|
81
|
+
|
82
|
+
Unique
|
83
|
+
------
|
84
|
+
Remove duplicate lines from a file. Duplicates are identified by key columns.
|
85
|
+
If no key columns are provided the whole line is checked for uniqueness
|
86
|
+
|
87
|
+
$ sycsvpro -f in.csv -o out.csv unique -r 1,2,8-12 -c 4,10-15 -k 0,1
|
88
|
+
|
79
89
|
Collect
|
80
90
|
-------
|
81
91
|
Collect all product rows (2, 3 and 4) to the category product
|
@@ -473,6 +483,11 @@ Version 0.1.10
|
|
473
483
|
* Fix ~/.syc/sycsvpro system directory creation when no .syc directory is
|
474
484
|
available
|
475
485
|
|
486
|
+
Version 0.1.11
|
487
|
+
--------------
|
488
|
+
* Unique removes duplicate lines from the infile. Duplicate lines are identified
|
489
|
+
by key columns
|
490
|
+
|
476
491
|
Installation
|
477
492
|
============
|
478
493
|
[![Gem Version](https://badge.fury.io/rb/sycsvpro.png)](http://badge.fury.io/rb/sycsvpro)
|
@@ -0,0 +1,77 @@
|
|
1
|
+
require 'set'
|
2
|
+
|
3
|
+
# Operating csv files
|
4
|
+
module Sycsvpro
|
5
|
+
|
6
|
+
# Removes copies of rows identified by key values
|
7
|
+
#
|
8
|
+
# | Name | Street | Town | Country |
|
9
|
+
# | ---- | ------ | ---- | ------- |
|
10
|
+
# | Jane | Canal | Win | CA |
|
11
|
+
# | Jack | Long | Van | CA |
|
12
|
+
# | Jean | Sing | Ma | DE |
|
13
|
+
# | Jane | Canal | Win | CA |
|
14
|
+
#
|
15
|
+
# Remove copies based on column 0 (Name)
|
16
|
+
#
|
17
|
+
# | Name | Street | Town | Country |
|
18
|
+
# | ---- | ------ | ---- | ------- |
|
19
|
+
# | Jane | Canal | Win | CA |
|
20
|
+
# | Jack | Long | Van | CA |
|
21
|
+
# | Jean | Sing | Ma | DE |
|
22
|
+
class Unique
|
23
|
+
|
24
|
+
include Dsl
|
25
|
+
|
26
|
+
# infile contains the data that is operated on
|
27
|
+
attr_reader :infile
|
28
|
+
# outfile is the file where the result is written to
|
29
|
+
attr_reader :outfile
|
30
|
+
# filter that is used for rows
|
31
|
+
attr_reader :row_filter
|
32
|
+
# filter that is used for columns
|
33
|
+
attr_reader :col_filter
|
34
|
+
|
35
|
+
# Creates a new Unique
|
36
|
+
# :call-seq:
|
37
|
+
# Sycsvpro::Unique.new(infile: "infile.csv",
|
38
|
+
# outfile: "outfile.csv",
|
39
|
+
# rows: "1,3-4",
|
40
|
+
# cols: "0,2,4-6",
|
41
|
+
# key: "0,1").execute
|
42
|
+
def initialize(options = {})
|
43
|
+
@infile = options[:infile]
|
44
|
+
@outfile = options[:outfile]
|
45
|
+
@row_filter = RowFilter.new(options[:rows], df: options[:df])
|
46
|
+
@col_filter = ColumnFilter.new(options[:cols], df: options[:df])
|
47
|
+
@key_filter = ColumnFilter.new(options[:key], df: options[:df])
|
48
|
+
@keys = Set.new
|
49
|
+
end
|
50
|
+
|
51
|
+
# Removes the duplicates from infile and writes the result to outfile
|
52
|
+
def execute
|
53
|
+
File.open(@outfile, 'w') do |out|
|
54
|
+
File.open(@infile, 'r').each_with_index do |line, index|
|
55
|
+
line = line.chomp
|
56
|
+
|
57
|
+
next if line.empty?
|
58
|
+
|
59
|
+
line = unstring(line).chomp
|
60
|
+
|
61
|
+
extraction = col_filter.process(row_filter.process(line, row: index))
|
62
|
+
|
63
|
+
next unless extraction
|
64
|
+
|
65
|
+
key = @key_filter.process(line)
|
66
|
+
|
67
|
+
unless @keys.include? key
|
68
|
+
out.puts extraction
|
69
|
+
@keys << key
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|
76
|
+
|
77
|
+
end
|
data/lib/sycsvpro/version.rb
CHANGED
@@ -0,0 +1,42 @@
|
|
1
|
+
require 'sycsvpro/unique'
|
2
|
+
|
3
|
+
module Sycsvpro
|
4
|
+
|
5
|
+
describe Unique do
|
6
|
+
|
7
|
+
before do
|
8
|
+
@infile = File.join(File.dirname(__FILE__), "files/customer-address.csv")
|
9
|
+
@outfile = File.join(File.dirname(__FILE__), "files/out.csv")
|
10
|
+
end
|
11
|
+
|
12
|
+
it "should remove copies" do
|
13
|
+
|
14
|
+
rows = "0-10"
|
15
|
+
cols = "0,1-3"
|
16
|
+
key = "0,1"
|
17
|
+
|
18
|
+
Sycsvpro::Unique.new(infile: @infile,
|
19
|
+
outfile: @outfile,
|
20
|
+
rows: rows,
|
21
|
+
cols: cols,
|
22
|
+
key: key).execute
|
23
|
+
|
24
|
+
result = [ "Name;Street;Town;Country",
|
25
|
+
"Jane;Canal;Vancouver;CA",
|
26
|
+
"John;Milton;Washington;US",
|
27
|
+
"Jne;Canal;Vancouver;CA",
|
28
|
+
"Jhn;Milton;Washington DC;US" ]
|
29
|
+
|
30
|
+
rows = 0
|
31
|
+
|
32
|
+
File.open(@outfile).each_with_index do |line, index|
|
33
|
+
line.chomp.should eq result[index]
|
34
|
+
rows += 1
|
35
|
+
end
|
36
|
+
|
37
|
+
rows.should eq result.count
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sycsvpro
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.11
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2014-07-
|
12
|
+
date: 2014-07-09 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
@@ -151,6 +151,7 @@ files:
|
|
151
151
|
- lib/sycsvpro/script_list.rb
|
152
152
|
- lib/sycsvpro/sorter.rb
|
153
153
|
- lib/sycsvpro/table.rb
|
154
|
+
- lib/sycsvpro/unique.rb
|
154
155
|
- lib/sycsvpro/version.rb
|
155
156
|
- spec/sycsvpro/aggregator_spec.rb
|
156
157
|
- spec/sycsvpro/allocator_spec.rb
|
@@ -174,6 +175,7 @@ files:
|
|
174
175
|
- spec/sycsvpro/script_list_spec.rb
|
175
176
|
- spec/sycsvpro/sorter_spec.rb
|
176
177
|
- spec/sycsvpro/table_spec.rb
|
178
|
+
- spec/sycsvpro/unique_spec.rb
|
177
179
|
- sycsvpro.gemspec
|
178
180
|
- sycsvpro.rdoc
|
179
181
|
homepage: https://github.com/sugaryourcoffee/syc-svpro
|