csvutils 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +258 -0
- data/lib/csvutils/commands/cut.rb +6 -6
- data/lib/csvutils/cut.rb +4 -1
- data/lib/csvutils/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c639c6a13e149e81b5255262884abd02fae3c22e
|
4
|
+
data.tar.gz: 73bec69652d262ebf616c00e8351f8788b0d022a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 45d8ba36d58577f3362cea9ead2ff9b952992f5038cb453dad6d666948a498269b60ef2c7d15b1c260b291a69c047ab3616ba62d93c92ac6af61cd5e8ae703fc
|
7
|
+
data.tar.gz: 56ebd4f88a54411e97312a832de6d2d830e1af15e287923535dd51edd4ac11c09ff525a7a27363d9c4255defbae7dc6040f6bb2202439fc06d4bf15adfe01b54
|
data/README.md
CHANGED
@@ -8,10 +8,268 @@
|
|
8
8
|
* forum :: [wwwmake](http://groups.google.com/group/wwwmake)
|
9
9
|
|
10
10
|
|
11
|
+
|
11
12
|
## Usage
|
12
13
|
|
14
|
+
### Command Line Tools
|
15
|
+
|
16
|
+
`csvhead` • `csvheader` • `csvstat` • `csvsplit` • `csvcut`
|
17
|
+
|
18
|
+
|
19
|
+
Try:
|
20
|
+
|
21
|
+
```
|
22
|
+
$ csvhead -h # or
|
23
|
+
$ csvhead --help
|
24
|
+
```
|
25
|
+
|
26
|
+
resulting in:
|
27
|
+
|
28
|
+
```
|
29
|
+
Usage: csvhead [OPTS] datafile ...
|
30
|
+
-n, --num=NUM Number of rows
|
31
|
+
-h, --help Prints this help
|
32
|
+
```
|
33
|
+
|
34
|
+
and so on. Now try with `csvheader -h`, `csvstat -h`, `csvsplit -h`,
|
35
|
+
`csvcut -h` and so on.
|
36
|
+
|
37
|
+
|
38
|
+
|
39
|
+
#### Working with Comma-Separated Values (CSV) Datafile Examples
|
40
|
+
|
41
|
+
Let's use a sample datafile e.g. `E0.csv` from the [football.csv project]() with
|
42
|
+
matches from the English Premier League. Try
|
43
|
+
|
44
|
+
```
|
45
|
+
$ csvhead E0.csv
|
46
|
+
```
|
47
|
+
to pretty print (pp) the first four row (use the `-n` option for more or less rows).
|
48
|
+
Resulting in:
|
49
|
+
|
50
|
+
```
|
51
|
+
== E0.csv (.) ==
|
52
|
+
|
53
|
+
#<CSV::Row "Date":nil "HomeTeam":"Arsenal" "AwayTeam":"Leicester" "FTHG":"4" "FTAG":"3" "HTHG":"2" "HTAG":"2">
|
54
|
+
#<CSV::Row "Date":nil "HomeTeam":"Brighton" "AwayTeam":"Man City" "FTHG":"0" "FTAG":"2" "HTHG":"?" "HTAG":"?">
|
55
|
+
#<CSV::Row "Date":"12/08/17" "HomeTeam":"Chelsea" "AwayTeam":"Burnley" "FTHG":"2" "FTAG":"3" "HTHG":"?" "HTAG":"?">
|
56
|
+
#<CSV::Row "Date":"-" "HomeTeam":"Crystal Palace" "AwayTeam":"Huddersfield" "FTHG":"0" "FTAG":"3" "HTHG":"0" "HTAG":"2">
|
57
|
+
|
58
|
+
4 rows
|
59
|
+
```
|
60
|
+
|
61
|
+
Next try
|
62
|
+
|
63
|
+
```
|
64
|
+
$ csvheader E0.csv
|
65
|
+
```
|
66
|
+
|
67
|
+
to print all header columns (the first row). Resulting in:
|
68
|
+
|
69
|
+
```
|
70
|
+
== E0.csv (.) ==
|
71
|
+
|
72
|
+
7 columns:
|
73
|
+
1: Date
|
74
|
+
2: HomeTeam
|
75
|
+
3: AwayTeam
|
76
|
+
4: FTHG
|
77
|
+
5: FTAG
|
78
|
+
6: HTHG
|
79
|
+
7: HTAG
|
80
|
+
```
|
81
|
+
|
82
|
+
Next try:
|
83
|
+
|
84
|
+
```
|
85
|
+
$ csvstat -c HomeTeam,AwayTeam E0.csv
|
86
|
+
```
|
87
|
+
|
88
|
+
to show all unique values for the columns `HomeTeam` and `AwayTeam`.
|
89
|
+
Resulting in:
|
90
|
+
|
91
|
+
```
|
92
|
+
== E0.csv (.) ==
|
93
|
+
|
94
|
+
... 380 rows
|
95
|
+
|
96
|
+
7 columns:
|
97
|
+
1: Date
|
98
|
+
2: HomeTeam
|
99
|
+
3: AwayTeam
|
100
|
+
4: FTHG
|
101
|
+
5: FTAG
|
102
|
+
6: HTHG
|
103
|
+
7: HTAG
|
104
|
+
|
105
|
+
column >HomeTeam< 21 unique values:
|
106
|
+
1 x <nil>
|
107
|
+
19 x Arsenal
|
108
|
+
18 x Bournemouth
|
109
|
+
19 x Brighton
|
110
|
+
19 x Burnley
|
111
|
+
19 x Chelsea
|
112
|
+
19 x Crystal Palace
|
113
|
+
19 x Everton
|
114
|
+
19 x Huddersfield
|
115
|
+
19 x Leicester
|
116
|
+
19 x Liverpool
|
117
|
+
19 x Man City
|
118
|
+
19 x Man United
|
119
|
+
19 x Newcastle
|
120
|
+
19 x Southampton
|
121
|
+
19 x Stoke
|
122
|
+
19 x Swansea
|
123
|
+
19 x Tottenham
|
124
|
+
19 x Watford
|
125
|
+
19 x West Brom
|
126
|
+
19 x West Ham
|
127
|
+
column >AwayTeam< 21 unique values:
|
128
|
+
1 x ?
|
129
|
+
19 x Arsenal
|
130
|
+
19 x Bournemouth
|
131
|
+
19 x Brighton
|
132
|
+
19 x Burnley
|
133
|
+
19 x Chelsea
|
134
|
+
19 x Crystal Palace
|
135
|
+
19 x Everton
|
136
|
+
19 x Huddersfield
|
137
|
+
19 x Leicester
|
138
|
+
19 x Liverpool
|
139
|
+
19 x Man City
|
140
|
+
19 x Man United
|
141
|
+
19 x Newcastle
|
142
|
+
19 x Southampton
|
143
|
+
19 x Stoke
|
144
|
+
19 x Swansea
|
145
|
+
19 x Tottenham
|
146
|
+
18 x Watford
|
147
|
+
19 x West Brom
|
148
|
+
19 x West Ham
|
149
|
+
```
|
150
|
+
|
151
|
+
|
152
|
+
#### Split & Cut - Split One Datafile into Many or Cut / Reorder Columns
|
153
|
+
|
154
|
+
Let's use another sample datafile e.g. `AUT.csv` that holds many seasons
|
155
|
+
from the Austrian (AUT) Bundesliga. First lets see how many seasons:
|
156
|
+
|
157
|
+
```
|
158
|
+
$ csvstat -c Season AUT.csv
|
159
|
+
```
|
160
|
+
|
161
|
+
Resulting in:
|
162
|
+
|
163
|
+
```
|
164
|
+
== AUT.csv (.) ==
|
165
|
+
|
166
|
+
... 362 rows
|
167
|
+
|
168
|
+
7 columns:
|
169
|
+
1: Season
|
170
|
+
2: Date
|
171
|
+
3: Time
|
172
|
+
4: Home
|
173
|
+
5: Away
|
174
|
+
6: HG
|
175
|
+
7: AG
|
176
|
+
|
177
|
+
column >Season< 2 unique values:
|
178
|
+
180 x 2016/2017
|
179
|
+
182 x 2017/2018
|
180
|
+
```
|
181
|
+
|
182
|
+
Now let's split the `AUT.csv` datafile by the `Season` column
|
183
|
+
resulting in two new datafiles named `AUT_2016-2017.csv`
|
184
|
+
and `ÀUT_2017-2018.csv`. Try:
|
185
|
+
|
186
|
+
```
|
187
|
+
$ csvsplit -c Season AUT.csv
|
188
|
+
```
|
189
|
+
|
190
|
+
Resulting in:
|
191
|
+
|
192
|
+
```
|
193
|
+
new chunk: ["2016/2017"]
|
194
|
+
saving >AUT_2016-2017.csv<...
|
195
|
+
new chunk: ["2017/2018"]
|
196
|
+
saving >AUT_2017-2018.csv<...
|
197
|
+
```
|
198
|
+
|
199
|
+
Let's cut out (remove) the `Season` and `Time` column from the new `AUT_2016-2017.csv`
|
200
|
+
datafile. Try:
|
201
|
+
|
202
|
+
```
|
203
|
+
$ csvcut -c Date,Home,Away,HG,AG AUT_2016-2017.csv
|
204
|
+
```
|
205
|
+
|
206
|
+
Double check the overwritten cleaned-up datafile:
|
207
|
+
|
208
|
+
```
|
209
|
+
$ csvhead AUT_2016-2017.csv
|
210
|
+
```
|
211
|
+
|
212
|
+
resulting in:
|
213
|
+
|
214
|
+
```
|
215
|
+
|
216
|
+
```
|
217
|
+
|
218
|
+
And so on and so forth.
|
219
|
+
|
220
|
+
|
221
|
+
|
222
|
+
|
223
|
+
### Code, Code, Code - Script Your Data Work Flow with Ruby
|
224
|
+
|
225
|
+
You can use all tools in your script using the `CsvUtils`
|
226
|
+
class methods:
|
227
|
+
|
228
|
+
| Shell | Ruby |
|
229
|
+
|-------------|-----------------------------------|
|
230
|
+
| `csvhead` | `CsvUtils.head( path, n: 4 )` |
|
231
|
+
| `csvheader` | `CsvUtils.header( path )` |
|
232
|
+
| `csvstat` | `CsvUtils.stat( path, *columns )` |
|
233
|
+
| `csvsplit` | `CsvUtils.split( path, *columns )` |
|
234
|
+
| `csvcut` | `CsvUtils.cut( path, *columns, output: path)` |
|
235
|
+
|
236
|
+
|
237
|
+
|
238
|
+
Let's retry the sample above in a script:
|
239
|
+
|
240
|
+
|
241
|
+
``` ruby
|
242
|
+
require 'csvutils'
|
243
|
+
|
244
|
+
|
245
|
+
CsvUtils.head( 'E0.csv' )
|
246
|
+
# same as
|
247
|
+
# $ csvhead E0.csv
|
248
|
+
|
249
|
+
CsvUtils.header( 'E0.csv' )
|
250
|
+
# => see above :-)
|
251
|
+
|
252
|
+
CsvUtils.stat( 'E0.csv', 'HomeTeam', 'AwayTeam' )
|
253
|
+
# same as:
|
254
|
+
# $ csvstat -c HomeTeam,AwayTeam E0.csv
|
255
|
+
|
256
|
+
|
257
|
+
CsvUtils.stat( 'AUT.csv', 'Season' )
|
258
|
+
# => same as
|
259
|
+
# $ csvstat -c Season AUT.csv
|
260
|
+
|
261
|
+
|
262
|
+
CsvUtils.split( 'AUT.csv', 'Season' )
|
263
|
+
# => see above :-)
|
264
|
+
|
265
|
+
CsvUtils.cut( 'AUT_2016-2017.csv', 'AUT_2016-2017.csv', 'Date', 'Home', 'Away', 'HG', 'AG' )
|
266
|
+
# => see above :-)
|
267
|
+
|
268
|
+
|
269
|
+
```
|
13
270
|
|
14
271
|
|
272
|
+
That's it.
|
15
273
|
|
16
274
|
|
17
275
|
## License
|
@@ -9,7 +9,7 @@ def self.cut( args )
|
|
9
9
|
config = { columns: [] }
|
10
10
|
|
11
11
|
parser = OptionParser.new do |opts|
|
12
|
-
opts.banner = "Usage: csvcut [OPTS] source dest"
|
12
|
+
opts.banner = "Usage: csvcut [OPTS] source [dest]"
|
13
13
|
|
14
14
|
opts.on("-c", "--columns=COLUMNS", "Name of header columns" ) do |columns|
|
15
15
|
config[:columns] = columns.split(/[,|;]/) ## allow differnt separators
|
@@ -26,17 +26,17 @@ def self.cut( args )
|
|
26
26
|
## pp config
|
27
27
|
## pp args
|
28
28
|
|
29
|
-
source =
|
30
|
-
dest =
|
29
|
+
source = args[0]
|
30
|
+
dest = args[1] || source ## default to same as source (note: overwrites datafile in place!!!)
|
31
31
|
|
32
|
-
unless
|
33
|
-
puts "** error: arg
|
32
|
+
unless args[0]
|
33
|
+
puts "** error: arg missing - source filepath required - #{args.inspect}"
|
34
34
|
exit 1
|
35
35
|
end
|
36
36
|
|
37
37
|
columns = config[:columns]
|
38
38
|
|
39
|
-
CsvUtils.cut( source,
|
39
|
+
CsvUtils.cut( source, *columns, output: dest )
|
40
40
|
end
|
41
41
|
|
42
42
|
|
data/lib/csvutils/cut.rb
CHANGED
@@ -5,7 +5,10 @@
|
|
5
5
|
|
6
6
|
class CsvUtils
|
7
7
|
|
8
|
-
def self.cut(
|
8
|
+
def self.cut( path, *columns, output: path, sep: ',' )
|
9
|
+
|
10
|
+
inpath = path
|
11
|
+
outpath = output # note: output defaults to inpath (overwrites datafile in-place!!!)
|
9
12
|
|
10
13
|
puts "cvscut in: >#{inpath}< out: >#{outpath}<"
|
11
14
|
|
data/lib/csvutils/version.rb
CHANGED