sycsvpro 0.1.10 → 0.1.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile.lock +1 -1
- data/README.md +15 -0
- data/lib/sycsvpro/unique.rb +77 -0
- data/lib/sycsvpro/version.rb +1 -1
- data/spec/sycsvpro/unique_spec.rb +42 -0
- metadata +4 -2
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -5,6 +5,8 @@ Processing of csv files. *sycsvpro* offers following functions
|
|
5
5
|
|
6
6
|
* analyze a csv file
|
7
7
|
* extract rows and columns from a file
|
8
|
+
* remove duplicate lines from a file where duplicates are identified by key
|
9
|
+
columns (since version 0.1.11)
|
8
10
|
* collect values of rows and assign them to categories
|
9
11
|
* map column values to new values
|
10
12
|
* allocate column values to a key column (since version 0.0.4)
|
@@ -76,6 +78,14 @@ Extract row 1,2 and 10-20 as well as columns 4 and 6-7
|
|
76
78
|
|
77
79
|
$ sycsvpro -f in.csv -o out.csv extract -r 1,2,10-20 -c 4,6-7
|
78
80
|
|
81
|
+
|
82
|
+
Unique
|
83
|
+
------
|
84
|
+
Remove duplicate lines from a file. Duplicates are identified by key columns.
|
85
|
+
If no key columns are provided the whole line is checked for uniqueness
|
86
|
+
|
87
|
+
$ sycsvpro -f in.csv -o out.csv unique -r 1,2,8-12 -c 4,10-15 -k 0,1
|
88
|
+
|
79
89
|
Collect
|
80
90
|
-------
|
81
91
|
Collect all product rows (2, 3 and 4) to the category product
|
@@ -473,6 +483,11 @@ Version 0.1.10
|
|
473
483
|
* Fix ~/.syc/sycsvpro system directory creation when no .syc directory is
|
474
484
|
available
|
475
485
|
|
486
|
+
Version 0.1.11
|
487
|
+
--------------
|
488
|
+
* Unique removes duplicate lines from the infile. Duplicate lines are identified
|
489
|
+
by key columns
|
490
|
+
|
476
491
|
Installation
|
477
492
|
============
|
478
493
|
[](http://badge.fury.io/rb/sycsvpro)
|
@@ -0,0 +1,77 @@
|
|
1
|
+
require 'set'
|
2
|
+
|
3
|
+
# Operating csv files
|
4
|
+
module Sycsvpro
|
5
|
+
|
6
|
+
# Removes copies of rows identified by key values
|
7
|
+
#
|
8
|
+
# | Name | Street | Town | Country |
|
9
|
+
# | ---- | ------ | ---- | ------- |
|
10
|
+
# | Jane | Canal | Win | CA |
|
11
|
+
# | Jack | Long | Van | CA |
|
12
|
+
# | Jean | Sing | Ma | DE |
|
13
|
+
# | Jane | Canal | Win | CA |
|
14
|
+
#
|
15
|
+
# Remove copies based on column 0 (Name)
|
16
|
+
#
|
17
|
+
# | Name | Street | Town | Country |
|
18
|
+
# | ---- | ------ | ---- | ------- |
|
19
|
+
# | Jane | Canal | Win | CA |
|
20
|
+
# | Jack | Long | Van | CA |
|
21
|
+
# | Jean | Sing | Ma | DE |
|
22
|
+
class Unique
|
23
|
+
|
24
|
+
include Dsl
|
25
|
+
|
26
|
+
# infile contains the data that is operated on
|
27
|
+
attr_reader :infile
|
28
|
+
# outfile is the file where the result is written to
|
29
|
+
attr_reader :outfile
|
30
|
+
# filter that is used for rows
|
31
|
+
attr_reader :row_filter
|
32
|
+
# filter that is used for columns
|
33
|
+
attr_reader :col_filter
|
34
|
+
|
35
|
+
# Creates a new Unique
|
36
|
+
# :call-seq:
|
37
|
+
# Sycsvpro::Unique.new(infile: "infile.csv",
|
38
|
+
# outfile: "outfile.csv",
|
39
|
+
# rows: "1,3-4",
|
40
|
+
# cols: "0,2,4-6",
|
41
|
+
# key: "0,1").execute
|
42
|
+
def initialize(options = {})
|
43
|
+
@infile = options[:infile]
|
44
|
+
@outfile = options[:outfile]
|
45
|
+
@row_filter = RowFilter.new(options[:rows], df: options[:df])
|
46
|
+
@col_filter = ColumnFilter.new(options[:cols], df: options[:df])
|
47
|
+
@key_filter = ColumnFilter.new(options[:key], df: options[:df])
|
48
|
+
@keys = Set.new
|
49
|
+
end
|
50
|
+
|
51
|
+
# Removes the duplicates from infile and writes the result to outfile
|
52
|
+
def execute
|
53
|
+
File.open(@outfile, 'w') do |out|
|
54
|
+
File.open(@infile, 'r').each_with_index do |line, index|
|
55
|
+
line = line.chomp
|
56
|
+
|
57
|
+
next if line.empty?
|
58
|
+
|
59
|
+
line = unstring(line).chomp
|
60
|
+
|
61
|
+
extraction = col_filter.process(row_filter.process(line, row: index))
|
62
|
+
|
63
|
+
next unless extraction
|
64
|
+
|
65
|
+
key = @key_filter.process(line)
|
66
|
+
|
67
|
+
unless @keys.include? key
|
68
|
+
out.puts extraction
|
69
|
+
@keys << key
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|
76
|
+
|
77
|
+
end
|
data/lib/sycsvpro/version.rb
CHANGED
@@ -0,0 +1,42 @@
|
|
1
|
+
require 'sycsvpro/unique'
|
2
|
+
|
3
|
+
module Sycsvpro
|
4
|
+
|
5
|
+
describe Unique do
|
6
|
+
|
7
|
+
before do
|
8
|
+
@infile = File.join(File.dirname(__FILE__), "files/customer-address.csv")
|
9
|
+
@outfile = File.join(File.dirname(__FILE__), "files/out.csv")
|
10
|
+
end
|
11
|
+
|
12
|
+
it "should remove copies" do
|
13
|
+
|
14
|
+
rows = "0-10"
|
15
|
+
cols = "0,1-3"
|
16
|
+
key = "0,1"
|
17
|
+
|
18
|
+
Sycsvpro::Unique.new(infile: @infile,
|
19
|
+
outfile: @outfile,
|
20
|
+
rows: rows,
|
21
|
+
cols: cols,
|
22
|
+
key: key).execute
|
23
|
+
|
24
|
+
result = [ "Name;Street;Town;Country",
|
25
|
+
"Jane;Canal;Vancouver;CA",
|
26
|
+
"John;Milton;Washington;US",
|
27
|
+
"Jne;Canal;Vancouver;CA",
|
28
|
+
"Jhn;Milton;Washington DC;US" ]
|
29
|
+
|
30
|
+
rows = 0
|
31
|
+
|
32
|
+
File.open(@outfile).each_with_index do |line, index|
|
33
|
+
line.chomp.should eq result[index]
|
34
|
+
rows += 1
|
35
|
+
end
|
36
|
+
|
37
|
+
rows.should eq result.count
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sycsvpro
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.11
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2014-07-
|
12
|
+
date: 2014-07-09 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
@@ -151,6 +151,7 @@ files:
|
|
151
151
|
- lib/sycsvpro/script_list.rb
|
152
152
|
- lib/sycsvpro/sorter.rb
|
153
153
|
- lib/sycsvpro/table.rb
|
154
|
+
- lib/sycsvpro/unique.rb
|
154
155
|
- lib/sycsvpro/version.rb
|
155
156
|
- spec/sycsvpro/aggregator_spec.rb
|
156
157
|
- spec/sycsvpro/allocator_spec.rb
|
@@ -174,6 +175,7 @@ files:
|
|
174
175
|
- spec/sycsvpro/script_list_spec.rb
|
175
176
|
- spec/sycsvpro/sorter_spec.rb
|
176
177
|
- spec/sycsvpro/table_spec.rb
|
178
|
+
- spec/sycsvpro/unique_spec.rb
|
177
179
|
- sycsvpro.gemspec
|
178
180
|
- sycsvpro.rdoc
|
179
181
|
homepage: https://github.com/sugaryourcoffee/syc-svpro
|