mdarray-jcsv 0.6.3-java
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE.txt +23 -0
- data/README.md +2 -0
- data/Rakefile +46 -0
- data/config.rb +104 -0
- data/lib/constraints.rb +205 -0
- data/lib/date_filters.rb +252 -0
- data/lib/dimensions.rb +276 -0
- data/lib/filters.rb +332 -0
- data/lib/jcsv.rb +107 -0
- data/lib/list_reader.rb +200 -0
- data/lib/locale.rb +192 -0
- data/lib/map_reader.rb +192 -0
- data/lib/mdarray-jcsv.rb +24 -0
- data/lib/mdarray_reader.rb +110 -0
- data/lib/numeric_filters.rb +225 -0
- data/lib/reader.rb +547 -0
- data/lib/supercsv_interface.rb +231 -0
- data/test/test_complete.rb +37 -0
- data/test/test_critbit.rb +442 -0
- data/test/test_customer_list.rb +436 -0
- data/test/test_customer_map.rb +209 -0
- data/test/test_customer_nhlist.rb +161 -0
- data/test/test_deep_map.rb +264 -0
- data/test/test_del.rb +73 -0
- data/test/test_dimensions.rb +231 -0
- data/test/test_example.rb +79 -0
- data/test/test_filters.rb +374 -0
- data/test/test_list_dimensions.rb +110 -0
- data/test/test_mdarray.rb +227 -0
- data/test/test_missing_data.rb +57 -0
- data/vendor/commons-beanutils-1.8.3.jar +0 -0
- data/vendor/commons-lang3-3.1.jar +0 -0
- data/vendor/dozer-5.4.0.jar +0 -0
- data/vendor/jcl-over-slf4j-1.6.6.jar +0 -0
- data/vendor/joda-time-2.7.jar +0 -0
- data/vendor/slf4j-api-1.7.5.jar +0 -0
- data/vendor/snakeyaml-1.14.jar +0 -0
- data/vendor/super-csv-2.4.0.jar +0 -0
- data/vendor/super-csv-dozer-2.4.0.jar +0 -0
- data/vendor/super-csv-java8-2.4.0.jar +0 -0
- data/vendor/super-csv-joda-2.4.0.jar +0 -0
- data/version.rb +2 -0
- metadata +196 -0
@@ -0,0 +1,436 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
3
|
+
##########################################################################################
|
4
|
+
# Copyright © 2015 Rodrigo Botafogo. All Rights Reserved. Permission to use, copy, modify,
|
5
|
+
# and distribute this software and its documentation for educational, research, and
|
6
|
+
# not-for-profit purposes, without fee and without a signed licensing agreement, is hereby
|
7
|
+
# granted, provided that the above copyright notice, this paragraph and the following two
|
8
|
+
# paragraphs appear in all copies, modifications, and distributions. Contact Rodrigo
|
9
|
+
# Botafogo - rodrigo.a.botafogo@gmail.com for commercial licensing opportunities.
|
10
|
+
#
|
11
|
+
# IN NO EVENT SHALL RODRIGO BOTAFOGO BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
|
12
|
+
# INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
|
13
|
+
# THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF RODRIGO BOTAFOGO HAS BEEN ADVISED OF THE
|
14
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
15
|
+
#
|
16
|
+
# RODRIGO BOTAFOGO SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
17
|
+
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
|
18
|
+
# SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS".
|
19
|
+
# RODRIGO BOTAFOGO HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS,
|
20
|
+
# OR MODIFICATIONS.
|
21
|
+
##########################################################################################
|
22
|
+
|
23
|
+
require 'rubygems'
|
24
|
+
require 'test/unit'
|
25
|
+
require 'shoulda'
|
26
|
+
|
27
|
+
require_relative '../config'
|
28
|
+
require 'jcsv'
|
29
|
+
|
30
|
+
class CSVTest < Test::Unit::TestCase
|
31
|
+
|
32
|
+
context "CSV test" do
|
33
|
+
|
34
|
+
setup do
|
35
|
+
|
36
|
+
end
|
37
|
+
|
38
|
+
#=begin
|
39
|
+
#-------------------------------------------------------------------------------------
|
40
|
+
#
|
41
|
+
#-------------------------------------------------------------------------------------
|
42
|
+
|
43
|
+
should "parse a csv file the quick way with headers" do
|
44
|
+
|
45
|
+
# Reads all rows in memory and return and array of arrays. Each line is stored in
|
46
|
+
# one array. Data is stored in the 'rows' instance variable.
|
47
|
+
# Create the reader with all default parameters. Headers are converted from string
|
48
|
+
# to symbol
|
49
|
+
reader = Jcsv.reader("../data/customer.csv")
|
50
|
+
|
51
|
+
# now read the whole csv file
|
52
|
+
content = reader.read
|
53
|
+
|
54
|
+
# Headers are converted to symbol
|
55
|
+
assert_equal([:customer_no, :first_name, :last_name, :birth_date, :mailing_address,
|
56
|
+
:married, :number_of_kids, :favourite_quote, :email, :loyalty_points],
|
57
|
+
reader.headers)
|
58
|
+
|
59
|
+
assert_equal(["1", "John", "Dunbar", "13/06/1945",
|
60
|
+
"1600 Amphitheatre Parkway\nMountain View, CA 94043\nUnited States",
|
61
|
+
nil, nil, "\"May the Force be with you.\" - Star Wars",
|
62
|
+
"jdunbar@gmail.com", "0"], content[0])
|
63
|
+
|
64
|
+
end
|
65
|
+
|
66
|
+
#-------------------------------------------------------------------------------------
|
67
|
+
#
|
68
|
+
#-------------------------------------------------------------------------------------
|
69
|
+
|
70
|
+
should "leave headers as string" do
|
71
|
+
|
72
|
+
# Reads all rows in memory and return and array of arrays. Each line is stored in
|
73
|
+
# one array. Data is stored in the 'rows' instance variable.
|
74
|
+
# Headers are kept as strings instead of symbol
|
75
|
+
reader = Jcsv.reader("../data/customer.csv", strings_as_keys: true)
|
76
|
+
|
77
|
+
# now read the whole csv file
|
78
|
+
content = reader.read
|
79
|
+
|
80
|
+
assert_equal(["customerNo", "firstName", "lastName", "birthDate", "mailingAddress",
|
81
|
+
"married", "numberOfKids", "favouriteQuote", "email", "loyaltyPoints"],
|
82
|
+
reader.headers)
|
83
|
+
|
84
|
+
end
|
85
|
+
|
86
|
+
#-------------------------------------------------------------------------------------
|
87
|
+
#
|
88
|
+
#-------------------------------------------------------------------------------------
|
89
|
+
|
90
|
+
should "parse a csv file passing a block" do
|
91
|
+
|
92
|
+
# read lines and pass them to a block for processing. The block receives the
|
93
|
+
# line_no (last line of the record), row_no, row and the headers. If has_haders is
|
94
|
+
# false, then headers will be nil. Instead of
|
95
|
+
# method foreach, one could also use method 'read' with a block. 'read' and
|
96
|
+
# 'foreach' are identical.
|
97
|
+
reader = Jcsv.reader("../data/customer.csv", headers: true, strings_as_keys: true)
|
98
|
+
|
99
|
+
reader.read do |line_no, row_no, row, headers|
|
100
|
+
assert_equal(4, line_no) if row_no == 2
|
101
|
+
assert_equal(7, line_no) if row_no == 3
|
102
|
+
assert_equal(10, line_no) if row_no == 4
|
103
|
+
assert_equal(13, line_no) if row_no == 5
|
104
|
+
|
105
|
+
assert_equal(["customerNo", "firstName", "lastName", "birthDate",
|
106
|
+
"mailingAddress", "married", "numberOfKids", "favouriteQuote",
|
107
|
+
"email", "loyaltyPoints"], headers)
|
108
|
+
|
109
|
+
# Since the file has a header, the third record is row_no = 4
|
110
|
+
assert_equal(["3", "Alice", "Wunderland",
|
111
|
+
"08/08/1985", "One Microsoft Way\nRedmond, WA 98052-6399\nUnited States",
|
112
|
+
"Y", "0", "\"Play it, Sam. Play \"As Time Goes By.\"\" - Casablanca",
|
113
|
+
"throughthelookingglass@yahoo.com", "2255887799"], row) if row_no == 4
|
114
|
+
end
|
115
|
+
|
116
|
+
end
|
117
|
+
|
118
|
+
#-------------------------------------------------------------------------------------
|
119
|
+
#
|
120
|
+
#-------------------------------------------------------------------------------------
|
121
|
+
|
122
|
+
should "parse a csv file with filters" do
|
123
|
+
|
124
|
+
# Add filters, to filter the columns according to given rules. numberOfKids is
|
125
|
+
# optional and should be converted to and int. married is optional and should be
|
126
|
+
# converted to a boolean
|
127
|
+
parser = Jcsv.reader("../data/customer.csv", default_filter: Jcsv.not_nil)
|
128
|
+
|
129
|
+
# Add filters, so that we get 'objects' instead of strings for filtered fields
|
130
|
+
parser.filters = {:number_of_kids => Jcsv.optional >> Jcsv.int,
|
131
|
+
:married => Jcsv.optional >> Jcsv.bool,
|
132
|
+
:customer_no => Jcsv.int,
|
133
|
+
:birth_date => Jcsv.date("dd/MM/yyyy")}
|
134
|
+
|
135
|
+
parser.read do |line_no, row_no, row, headers|
|
136
|
+
|
137
|
+
# First field is customer number, which is converted to int
|
138
|
+
assert_equal(1, row[0]) if row_no == 2
|
139
|
+
assert_equal("John", row[1]) if row_no == 2
|
140
|
+
# Field 5 is :married. It is optional, so leaving it blank (nil) is ok.
|
141
|
+
assert_equal(nil, row[5]) if row_no == 2
|
142
|
+
|
143
|
+
# notice that field married that was "Y" is now true. Number of kids is not "0",
|
144
|
+
# but 0, customerNo is also and int
|
145
|
+
assert_equal(true, row[5]) if row_no == 3
|
146
|
+
|
147
|
+
end
|
148
|
+
|
149
|
+
end
|
150
|
+
|
151
|
+
#-------------------------------------------------------------------------------------
|
152
|
+
#
|
153
|
+
#-------------------------------------------------------------------------------------
|
154
|
+
|
155
|
+
should "Read file in chunks passing a block" do
|
156
|
+
|
157
|
+
# Read chunks of the file. In this case, we are breaking the file in chunks of 2
|
158
|
+
reader = Jcsv.reader("../data/customer.csv", chunk_size: 2)
|
159
|
+
|
160
|
+
# Add filters, so that we get 'objects' instead of strings for filtered fields
|
161
|
+
reader.filters = {:number_of_kids => Jcsv.optional >> Jcsv.int,
|
162
|
+
:married => Jcsv.optional >> Jcsv.bool,
|
163
|
+
:customer_no => Jcsv.int}
|
164
|
+
|
165
|
+
reader.each do |line_no, row_no, chunk, headers|
|
166
|
+
# line_no and row_no are the last read line_no and row_no of the chunk. Since we
|
167
|
+
# have headers and are reading in chunks of two, the first chunk has row_no = 3
|
168
|
+
assert_equal([[1, "John", "Dunbar", "13/06/1945",
|
169
|
+
"1600 Amphitheatre Parkway\nMountain View, CA 94043\nUnited States",
|
170
|
+
nil, nil, "\"May the Force be with you.\" - Star Wars",
|
171
|
+
"jdunbar@gmail.com", "0"],
|
172
|
+
[2, "Bob", "Down", "25/02/1919",
|
173
|
+
"1601 Willow Rd.\nMenlo Park, CA 94025\nUnited States",
|
174
|
+
true, 0, "\"Frankly, my dear, I don't give a damn.\" - Gone With The Wind",
|
175
|
+
"bobdown@hotmail.com", "123456"]], chunk) if row_no == 3
|
176
|
+
end
|
177
|
+
|
178
|
+
end
|
179
|
+
|
180
|
+
|
181
|
+
#-------------------------------------------------------------------------------------
|
182
|
+
#
|
183
|
+
#-------------------------------------------------------------------------------------
|
184
|
+
|
185
|
+
should "Read file in chunks, last chunk smaller" do
|
186
|
+
|
187
|
+
# Read chunks of the file. In this case, we are breaking the file in chunks of 3.
|
188
|
+
# Since we only have 4 rows, the first chunk will have 3 rows and the second chunk
|
189
|
+
# will have 1 row
|
190
|
+
reader = Jcsv.reader("../data/customer.csv", chunk_size: 3)
|
191
|
+
|
192
|
+
enum = reader.each do |line_no, row_no, chunk, headers|
|
193
|
+
assert_equal([["1", "John", "Dunbar", "13/06/1945",
|
194
|
+
"1600 Amphitheatre Parkway\nMountain View, CA 94043\nUnited States",
|
195
|
+
nil, nil,
|
196
|
+
"\"May the Force be with you.\" - Star Wars", "jdunbar@gmail.com", "0"],
|
197
|
+
["2", "Bob", "Down", "25/02/1919",
|
198
|
+
"1601 Willow Rd.\nMenlo Park, CA 94025\nUnited States",
|
199
|
+
"Y", "0", "\"Frankly, my dear, I don't give a damn.\" - Gone With The Wind",
|
200
|
+
"bobdown@hotmail.com", "123456"],
|
201
|
+
["3", "Alice", "Wunderland", "08/08/1985",
|
202
|
+
"One Microsoft Way\nRedmond, WA 98052-6399\nUnited States", "Y", "0",
|
203
|
+
"\"Play it, Sam. Play \"As Time Goes By.\"\" - Casablanca",
|
204
|
+
"throughthelookingglass@yahoo.com", "2255887799"]], chunk) if row_no == 4
|
205
|
+
|
206
|
+
assert_equal([["4", "Bill", "Jobs", "10/07/1973",
|
207
|
+
"2701 San Tomas Expressway\nSanta Clara, CA 95050\nUnited States", "Y", "3",
|
208
|
+
"\"You've got to ask yourself one question: \"Do I feel lucky?\" Well, do ya, punk?\" - Dirty Harry",
|
209
|
+
"billy34@hotmail.com", "36"]], chunk) if row_no == 5
|
210
|
+
|
211
|
+
end
|
212
|
+
|
213
|
+
end
|
214
|
+
|
215
|
+
#-------------------------------------------------------------------------------------
|
216
|
+
#
|
217
|
+
#-------------------------------------------------------------------------------------
|
218
|
+
|
219
|
+
should "Read file in one big chunk" do
|
220
|
+
|
221
|
+
p "TODO: add some test cases... no need to go into the tutorial."
|
222
|
+
|
223
|
+
# Read chunks of the file. In this case, we are breaking the file in chunks of 2
|
224
|
+
reader = Jcsv.reader("../data/customer.csv", chunk_size: :all)
|
225
|
+
|
226
|
+
# Add filters, so that we get 'objects' instead of strings for filtered fields
|
227
|
+
reader.filters = {:number_of_kids => Jcsv.optional >> Jcsv.int,
|
228
|
+
:married => Jcsv.optional >> Jcsv.bool,
|
229
|
+
:customer_no => Jcsv.int}
|
230
|
+
end
|
231
|
+
|
232
|
+
#-------------------------------------------------------------------------------------
|
233
|
+
#
|
234
|
+
#-------------------------------------------------------------------------------------
|
235
|
+
|
236
|
+
should "Read file in chunks as enumerator" do
|
237
|
+
|
238
|
+
reader = Jcsv.reader("../data/customer.csv", chunk_size: 2)
|
239
|
+
|
240
|
+
# Add filters, so that we get 'objects' instead of strings for filtered fields
|
241
|
+
reader.filters = {:number_of_kids => Jcsv.optional >> Jcsv.int,
|
242
|
+
:married => Jcsv.optional >> Jcsv.bool,
|
243
|
+
:customer_no => Jcsv.int}
|
244
|
+
|
245
|
+
# Method each without a block returns an enumerator
|
246
|
+
enum = reader.each
|
247
|
+
|
248
|
+
# read the first chunk. Chunk is of size 2
|
249
|
+
chunk = enum.next
|
250
|
+
|
251
|
+
assert_equal(7, chunk[0])
|
252
|
+
assert_equal(3, chunk[1])
|
253
|
+
assert_equal([[1, "John", "Dunbar", "13/06/1945",
|
254
|
+
"1600 Amphitheatre Parkway\nMountain View, CA 94043\nUnited States", nil, nil,
|
255
|
+
"\"May the Force be with you.\" - Star Wars", "jdunbar@gmail.com", "0"],
|
256
|
+
[2, "Bob", "Down", "25/02/1919",
|
257
|
+
"1601 Willow Rd.\nMenlo Park, CA 94025\nUnited States",
|
258
|
+
true, 0, "\"Frankly, my dear, I don't give a damn.\" - Gone With The Wind",
|
259
|
+
"bobdown@hotmail.com", "123456"]], chunk[2])
|
260
|
+
|
261
|
+
# read second chunk
|
262
|
+
c = enum.next
|
263
|
+
|
264
|
+
# trying to read another chunk will raise StopIteration
|
265
|
+
assert_raise ( StopIteration ) { enum.next }
|
266
|
+
|
267
|
+
end
|
268
|
+
|
269
|
+
#-------------------------------------------------------------------------------------
|
270
|
+
#
|
271
|
+
#-------------------------------------------------------------------------------------
|
272
|
+
|
273
|
+
should "Read file in chunks as enumerator... last chunk smaller" do
|
274
|
+
|
275
|
+
# Same test with a chunk_size of 3
|
276
|
+
reader = Jcsv.reader("../data/customer.csv", chunk_size: 3)
|
277
|
+
|
278
|
+
# Method each without a block returns an enumerator
|
279
|
+
enum = reader.each
|
280
|
+
|
281
|
+
# read first chunk. Does nothing with the data.
|
282
|
+
enum.next
|
283
|
+
|
284
|
+
|
285
|
+
# read second chunk... only one row will be returned
|
286
|
+
chunk = enum.next
|
287
|
+
|
288
|
+
# assert_equal()
|
289
|
+
assert_equal([["4", "Bill", "Jobs", "10/07/1973",
|
290
|
+
"2701 San Tomas Expressway\nSanta Clara, CA 95050\nUnited States", "Y", "3",
|
291
|
+
"\"You've got to ask yourself one question: \"Do I feel lucky?\" Well, do ya, punk?\" - Dirty Harry",
|
292
|
+
"billy34@hotmail.com", "36"]], chunk[2])
|
293
|
+
|
294
|
+
# trying to read another chunk will raise StopIteration
|
295
|
+
assert_raise ( StopIteration ) { enum.next }
|
296
|
+
|
297
|
+
end
|
298
|
+
|
299
|
+
#-------------------------------------------------------------------------------------
|
300
|
+
#
|
301
|
+
#-------------------------------------------------------------------------------------
|
302
|
+
|
303
|
+
should "Read file skipping columns" do
|
304
|
+
|
305
|
+
reader = Jcsv.reader("../data/customer.csv")
|
306
|
+
|
307
|
+
# Add mapping. When column is mapped to false, it will not be retrieved from the
|
308
|
+
# file, improving time and speed efficiency
|
309
|
+
reader.mapping = {:customer_no => false, :number_of_kids => false,
|
310
|
+
:loyalty_points => false}
|
311
|
+
|
312
|
+
reader.read do |line_no, row_no, chunk, headers|
|
313
|
+
assert_equal([:first_name, :last_name, :birth_date, :mailing_address, :married,
|
314
|
+
:favourite_quote, :email], headers)
|
315
|
+
if (row_no == 2)
|
316
|
+
assert_equal("John", chunk[0])
|
317
|
+
assert_equal("Dunbar", chunk[1])
|
318
|
+
end
|
319
|
+
end
|
320
|
+
|
321
|
+
end
|
322
|
+
|
323
|
+
#-------------------------------------------------------------------------------------
|
324
|
+
#
|
325
|
+
#-------------------------------------------------------------------------------------
|
326
|
+
|
327
|
+
should "Read file columns as labels" do
|
328
|
+
|
329
|
+
reader = Jcsv.reader("../data/customer.csv")
|
330
|
+
|
331
|
+
# mapping cannot be to true
|
332
|
+
assert_raise (RuntimeError) {
|
333
|
+
reader.mapping = {:customer_no => true, :number_of_kids => true,
|
334
|
+
:loyalty_points => true}}
|
335
|
+
|
336
|
+
end
|
337
|
+
|
338
|
+
#=end
|
339
|
+
|
340
|
+
#-------------------------------------------------------------------------------------
|
341
|
+
#
|
342
|
+
#-------------------------------------------------------------------------------------
|
343
|
+
|
344
|
+
should "Read file skipping columns, with headers as string" do
|
345
|
+
|
346
|
+
reader = Jcsv.reader("../data/customer.csv", strings_as_keys: true)
|
347
|
+
|
348
|
+
# Add mapping. When column is mapped to false, it will not be retrieved from the
|
349
|
+
# file, improving time and speed efficiency
|
350
|
+
reader.mapping = {"customerNo" => false, "numberOfKids" => false,
|
351
|
+
"loyaltyPoints" => false}
|
352
|
+
|
353
|
+
reader.read do |line_no, row_no, chunk, headers|
|
354
|
+
assert_equal(["firstName", "lastName", "birthDate", "mailingAddress", "married",
|
355
|
+
"favouriteQuote", "email"], headers)
|
356
|
+
if (row_no == 2)
|
357
|
+
assert_equal("John", chunk[0])
|
358
|
+
assert_equal("Dunbar", chunk[1])
|
359
|
+
end
|
360
|
+
end
|
361
|
+
|
362
|
+
end
|
363
|
+
|
364
|
+
#-------------------------------------------------------------------------------------
|
365
|
+
#
|
366
|
+
#-------------------------------------------------------------------------------------
|
367
|
+
|
368
|
+
should "Read file reordering columns" do
|
369
|
+
|
370
|
+
# Here we are setting headers to false, so the first line will not be considere
|
371
|
+
# a header.
|
372
|
+
reader = Jcsv.reader("../data/customer.csv", chunk_size: 2)
|
373
|
+
# reading the headers returns false
|
374
|
+
# assert_equal(false, reader.headers)
|
375
|
+
|
376
|
+
reader.filters = {:number_of_kids => Jcsv.optional >> Jcsv.int,
|
377
|
+
:married => Jcsv.optional >> Jcsv.bool,
|
378
|
+
:customer_no => Jcsv.int}
|
379
|
+
|
380
|
+
# Mapping allows reordering of columns. In this example, column 0 (:customerno)
|
381
|
+
# in the csv file will be loaded in position 2 (3rd column); column 1 (:firstname)
|
382
|
+
# in the csv file will be loaded in position 0 (1st column); column 2 on the csv file
|
383
|
+
# will not be loaded (false); column 4 (:birthdate) will be loaded on position 3,
|
384
|
+
# and so on.
|
385
|
+
# When reordering columns, care should be taken to get the mapping right or unexpected
|
386
|
+
# behaviour could result.
|
387
|
+
reader.mapping = {:customer_no => 2, :first_name => 0, :last_name => false,
|
388
|
+
:birth_date => 3, :mailing_address => false, :married => false,
|
389
|
+
:number_of_kids => false, :favourite_quote => false, :email => 1,
|
390
|
+
:loyalty_points => 4}
|
391
|
+
|
392
|
+
reader.read do |line_no, row_no, chunk, headers|
|
393
|
+
assert_equal([:first_name, :email, :customer_no, :birth_date, :loyalty_points],
|
394
|
+
headers)
|
395
|
+
assert_equal("John", chunk[0][0]) if row_no == 3
|
396
|
+
assert_equal("Alice", chunk[0][0]) if row_no == 5
|
397
|
+
end
|
398
|
+
|
399
|
+
end
|
400
|
+
=begin
|
401
|
+
#-------------------------------------------------------------------------------------
|
402
|
+
# JRuby fiber seems to have a bug. Don't know if only JRuby fiber or fibers in
|
403
|
+
# general. When returning the first element the second is also retrieved (look
|
404
|
+
# forward: might be a reason, but prevents changing the behaviour in between calls to
|
405
|
+
# next.
|
406
|
+
#-------------------------------------------------------------------------------------
|
407
|
+
|
408
|
+
should "allow changing parameters in between reads" do
|
409
|
+
|
410
|
+
p "testing fiber"
|
411
|
+
|
412
|
+
# Start with chunk_size 1
|
413
|
+
reader = Jcsv.reader("../data/customer.csv", headers: true, chunk_size: 1)
|
414
|
+
|
415
|
+
# Method each without a block returns an enumerator
|
416
|
+
enum = reader.each
|
417
|
+
|
418
|
+
# read first chunk. Does nothing with the data. Got only one line of data
|
419
|
+
p enum.next
|
420
|
+
|
421
|
+
# change chunk_size to 2
|
422
|
+
reader.chunk_size = 2
|
423
|
+
|
424
|
+
# read second chunk... only one row will be returned
|
425
|
+
chunk = enum.next
|
426
|
+
p chunk
|
427
|
+
# assert_equal()
|
428
|
+
|
429
|
+
p enum.next
|
430
|
+
|
431
|
+
end
|
432
|
+
=end
|
433
|
+
end
|
434
|
+
|
435
|
+
end
|
436
|
+
|
@@ -0,0 +1,209 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
3
|
+
##########################################################################################
|
4
|
+
# Copyright © 2015 Rodrigo Botafogo. All Rights Reserved. Permission to use, copy, modify,
|
5
|
+
# and distribute this software and its documentation for educational, research, and
|
6
|
+
# not-for-profit purposes, without fee and without a signed licensing agreement, is hereby
|
7
|
+
# granted, provided that the above copyright notice, this paragraph and the following two
|
8
|
+
# paragraphs appear in all copies, modifications, and distributions. Contact Rodrigo
|
9
|
+
# Botafogo - rodrigo.a.botafogo@gmail.com for commercial licensing opportunities.
|
10
|
+
#
|
11
|
+
# IN NO EVENT SHALL RODRIGO BOTAFOGO BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
|
12
|
+
# INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
|
13
|
+
# THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF RODRIGO BOTAFOGO HAS BEEN ADVISED OF THE
|
14
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
15
|
+
#
|
16
|
+
# RODRIGO BOTAFOGO SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
17
|
+
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
|
18
|
+
# SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS".
|
19
|
+
# RODRIGO BOTAFOGO HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS,
|
20
|
+
# OR MODIFICATIONS.
|
21
|
+
##########################################################################################
|
22
|
+
|
23
|
+
require 'rubygems'
|
24
|
+
require 'test/unit'
|
25
|
+
require 'shoulda'
|
26
|
+
require 'date'
|
27
|
+
|
28
|
+
require_relative '../config' if !@platform
|
29
|
+
|
30
|
+
require 'jcsv'
|
31
|
+
|
32
|
+
class CSVTest < Test::Unit::TestCase
|
33
|
+
|
34
|
+
context "CSV test" do
|
35
|
+
|
36
|
+
setup do
|
37
|
+
|
38
|
+
end
|
39
|
+
|
40
|
+
#-------------------------------------------------------------------------------------
|
41
|
+
#
|
42
|
+
#-------------------------------------------------------------------------------------
|
43
|
+
|
44
|
+
should "parse a csv file to map the quick way" do
|
45
|
+
|
46
|
+
reader = Jcsv.reader("../data/customer.csv", format: :map)
|
47
|
+
# map is an array of hashes
|
48
|
+
map = reader.read
|
49
|
+
|
50
|
+
# get customerNo of second row
|
51
|
+
assert_equal("2", map[1][:customer_no])
|
52
|
+
# loyaltyPoints from 4th row
|
53
|
+
assert_equal("36", map[3][:loyalty_points])
|
54
|
+
|
55
|
+
end
|
56
|
+
|
57
|
+
#-------------------------------------------------------------------------------------
|
58
|
+
#
|
59
|
+
#-------------------------------------------------------------------------------------
|
60
|
+
|
61
|
+
should "parse a csv file to map without filters nor mappings in chunks" do
|
62
|
+
|
63
|
+
# type is :map. Rows are hashes. Set the default filter to not_nil. That is, all
|
64
|
+
# fields are required unless explicitly set to optional.
|
65
|
+
reader = Jcsv.reader("../data/customer.csv", format: :map, chunk_size: 2,
|
66
|
+
strings_as_keys: true)
|
67
|
+
|
68
|
+
map = reader.read
|
69
|
+
|
70
|
+
# since chunk_size = 2, but we didn't pass a block to reader, we will get back
|
71
|
+
# 1 array, with 2 arrays each with a chunk. Every element of the internal arrays
|
72
|
+
# are maps (hashes)
|
73
|
+
|
74
|
+
# Bellow we are looking at the second chunk, element 0. Since our chunks are of
|
75
|
+
# size 2, the second chunk, element 0 is the third row.
|
76
|
+
assert_equal("2255887799", map[1][0]["loyaltyPoints"])
|
77
|
+
|
78
|
+
end
|
79
|
+
|
80
|
+
#-------------------------------------------------------------------------------------
|
81
|
+
#
|
82
|
+
#-------------------------------------------------------------------------------------
|
83
|
+
|
84
|
+
should "parse a csv file to map" do
|
85
|
+
|
86
|
+
# type is :map. Rows are hashes. Set the default filter to not_nil. That is, all
|
87
|
+
# fields are required unless explicitly set to optional.
|
88
|
+
reader = Jcsv.reader("../data/customer.csv", format: :map, default_filter: Jcsv.not_nil)
|
89
|
+
|
90
|
+
# Set numberOfKids and married as optional, otherwise an exception will be raised
|
91
|
+
reader.filters = {:number_of_kids => Jcsv.optional >> Jcsv.int,
|
92
|
+
:married => Jcsv.optional >> Jcsv.bool,
|
93
|
+
:loyalty_points => Jcsv.long,
|
94
|
+
:customer_no => Jcsv.int,
|
95
|
+
:birth_date => Jcsv.date("dd/MM/yyyy")}
|
96
|
+
|
97
|
+
# When parsing to map, it is possible to make a mapping. If column name is :false
|
98
|
+
# the column will be removed from the returned row
|
99
|
+
reader.mapping = {:number_of_kids => :numero_criancas,
|
100
|
+
:married => "casado",
|
101
|
+
:loyalty_points => "pontos fidelidade",
|
102
|
+
:customer_no => false}
|
103
|
+
|
104
|
+
reader.read do |line_no, row_no, row, headers|
|
105
|
+
if (row_no == 5)
|
106
|
+
assert_equal(nil, row[:customer_no])
|
107
|
+
assert_equal("Bill", row[:first_name])
|
108
|
+
assert_equal(true, row["casado"])
|
109
|
+
assert_equal("1973-07-10T00:00:00+00:00", row[:birth_date].to_s)
|
110
|
+
assert_equal("2701 San Tomas Expressway\nSanta Clara, CA 95050\nUnited States",
|
111
|
+
row[:mailing_address])
|
112
|
+
assert_equal(3, row[:numero_criancas])
|
113
|
+
end
|
114
|
+
|
115
|
+
end
|
116
|
+
|
117
|
+
end
|
118
|
+
|
119
|
+
#-------------------------------------------------------------------------------------
|
120
|
+
#
|
121
|
+
#-------------------------------------------------------------------------------------
|
122
|
+
|
123
|
+
should "raise exception if no header when reading map" do
|
124
|
+
|
125
|
+
# Will raise an exception as reading a file as map requires the header
|
126
|
+
assert_raise ( Jcsv::MissingHeadersError ) {
|
127
|
+
Jcsv.reader("../data/customer.csv", format: :map, headers: false)
|
128
|
+
}
|
129
|
+
|
130
|
+
end
|
131
|
+
|
132
|
+
#-------------------------------------------------------------------------------------
|
133
|
+
#
|
134
|
+
#-------------------------------------------------------------------------------------
|
135
|
+
|
136
|
+
should "raise ConstraintViolation" do
|
137
|
+
|
138
|
+
reader = Jcsv.reader("../data/customer.csv", format: :map, default_filter: Jcsv.not_nil,
|
139
|
+
headers: true, strings_as_keys: true)
|
140
|
+
|
141
|
+
# Set numberOfKids and married as optional, otherwise an exception will be raised
|
142
|
+
reader.filters = {"numberOfKids" => Jcsv.optional >> Jcsv.int,
|
143
|
+
"loyaltyPoints" => Jcsv.long,
|
144
|
+
"customerNo" => Jcsv.int,
|
145
|
+
"birthDate" => Jcsv.date("dd/mm/yyyy")}
|
146
|
+
|
147
|
+
# reader.read { |line_no, row_no, row, headers| }
|
148
|
+
# Will raise an exception, as the default_filter is not_nil and there is a record
|
149
|
+
# in which field 'married' is nil
|
150
|
+
assert_raise ( Jcsv::ConstraintViolation ) {
|
151
|
+
reader.read { |line_no, row_no, row, headers| } }
|
152
|
+
|
153
|
+
end
|
154
|
+
|
155
|
+
#-------------------------------------------------------------------------------------
|
156
|
+
#
|
157
|
+
#-------------------------------------------------------------------------------------
|
158
|
+
|
159
|
+
should "catch FilterError" do
|
160
|
+
|
161
|
+
reader = Jcsv.reader("../data/customer.csv", format: :map, headers: true,
|
162
|
+
strings_as_keys: true)
|
163
|
+
# Set numberOfKids and married as optional, otherwise an exception will be raised
|
164
|
+
reader.filters = {"numberOfKids" => Jcsv.optional >> Jcsv.int,
|
165
|
+
"loyaltyPoints" => Jcsv.bool,
|
166
|
+
"customerNo" => Jcsv.int,
|
167
|
+
"birthDate" => Jcsv.date("dd/mm/yyyy")}
|
168
|
+
|
169
|
+
begin
|
170
|
+
reader.read do |line_no, row_no, row, headers|
|
171
|
+
p row
|
172
|
+
end
|
173
|
+
rescue Jcsv::FilterError => e
|
174
|
+
puts e.message
|
175
|
+
retry
|
176
|
+
end
|
177
|
+
|
178
|
+
end
|
179
|
+
|
180
|
+
#-------------------------------------------------------------------------------------
|
181
|
+
#
|
182
|
+
#-------------------------------------------------------------------------------------
|
183
|
+
|
184
|
+
should "Read file in chunks passing a block as iterator" do
|
185
|
+
|
186
|
+
# Read chunks of the file. In this case, we are breaking the file in chunks of two
|
187
|
+
reader = Jcsv.reader("../data/customer.csv", chunk_size: 2, format: :map,
|
188
|
+
strings_as_keys: true)
|
189
|
+
|
190
|
+
# Add filters, so that we get 'objects' instead of strings for filtered fields
|
191
|
+
reader.filters = {"numberOfKids" => Jcsv.optional >> Jcsv.int,
|
192
|
+
"married" => Jcsv.optional >> Jcsv.bool,
|
193
|
+
"customerNo" => Jcsv.int}
|
194
|
+
|
195
|
+
iter = reader.each
|
196
|
+
chunk1 = iter.next
|
197
|
+
# 3rd item in the chunk1 array is the data. 1st item is the line_no and 2nd item
|
198
|
+
# row_no. Chunks are of size 2, so chunk1[2][1] is the second element of the first
|
199
|
+
# chunk
|
200
|
+
assert_equal(2, chunk1[2][1]["customerNo"])
|
201
|
+
assert_equal("Down", chunk1[2][1]["lastName"])
|
202
|
+
|
203
|
+
chunk2 = iter.next
|
204
|
+
|
205
|
+
end
|
206
|
+
|
207
|
+
end
|
208
|
+
|
209
|
+
end
|