mdarray-jcsv 0.6.3-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +23 -0
- data/README.md +2 -0
- data/Rakefile +46 -0
- data/config.rb +104 -0
- data/lib/constraints.rb +205 -0
- data/lib/date_filters.rb +252 -0
- data/lib/dimensions.rb +276 -0
- data/lib/filters.rb +332 -0
- data/lib/jcsv.rb +107 -0
- data/lib/list_reader.rb +200 -0
- data/lib/locale.rb +192 -0
- data/lib/map_reader.rb +192 -0
- data/lib/mdarray-jcsv.rb +24 -0
- data/lib/mdarray_reader.rb +110 -0
- data/lib/numeric_filters.rb +225 -0
- data/lib/reader.rb +547 -0
- data/lib/supercsv_interface.rb +231 -0
- data/test/test_complete.rb +37 -0
- data/test/test_critbit.rb +442 -0
- data/test/test_customer_list.rb +436 -0
- data/test/test_customer_map.rb +209 -0
- data/test/test_customer_nhlist.rb +161 -0
- data/test/test_deep_map.rb +264 -0
- data/test/test_del.rb +73 -0
- data/test/test_dimensions.rb +231 -0
- data/test/test_example.rb +79 -0
- data/test/test_filters.rb +374 -0
- data/test/test_list_dimensions.rb +110 -0
- data/test/test_mdarray.rb +227 -0
- data/test/test_missing_data.rb +57 -0
- data/vendor/commons-beanutils-1.8.3.jar +0 -0
- data/vendor/commons-lang3-3.1.jar +0 -0
- data/vendor/dozer-5.4.0.jar +0 -0
- data/vendor/jcl-over-slf4j-1.6.6.jar +0 -0
- data/vendor/joda-time-2.7.jar +0 -0
- data/vendor/slf4j-api-1.7.5.jar +0 -0
- data/vendor/snakeyaml-1.14.jar +0 -0
- data/vendor/super-csv-2.4.0.jar +0 -0
- data/vendor/super-csv-dozer-2.4.0.jar +0 -0
- data/vendor/super-csv-java8-2.4.0.jar +0 -0
- data/vendor/super-csv-joda-2.4.0.jar +0 -0
- data/version.rb +2 -0
- metadata +196 -0
@@ -0,0 +1,436 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
3
|
+
##########################################################################################
|
4
|
+
# Copyright © 2015 Rodrigo Botafogo. All Rights Reserved. Permission to use, copy, modify,
|
5
|
+
# and distribute this software and its documentation for educational, research, and
|
6
|
+
# not-for-profit purposes, without fee and without a signed licensing agreement, is hereby
|
7
|
+
# granted, provided that the above copyright notice, this paragraph and the following two
|
8
|
+
# paragraphs appear in all copies, modifications, and distributions. Contact Rodrigo
|
9
|
+
# Botafogo - rodrigo.a.botafogo@gmail.com for commercial licensing opportunities.
|
10
|
+
#
|
11
|
+
# IN NO EVENT SHALL RODRIGO BOTAFOGO BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
|
12
|
+
# INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
|
13
|
+
# THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF RODRIGO BOTAFOGO HAS BEEN ADVISED OF THE
|
14
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
15
|
+
#
|
16
|
+
# RODRIGO BOTAFOGO SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
17
|
+
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
|
18
|
+
# SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS".
|
19
|
+
# RODRIGO BOTAFOGO HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS,
|
20
|
+
# OR MODIFICATIONS.
|
21
|
+
##########################################################################################
|
22
|
+
|
23
|
+
require 'rubygems'
|
24
|
+
require 'test/unit'
|
25
|
+
require 'shoulda'
|
26
|
+
|
27
|
+
require_relative '../config'
|
28
|
+
require 'jcsv'
|
29
|
+
|
30
|
+
class CSVTest < Test::Unit::TestCase
|
31
|
+
|
32
|
+
context "CSV test" do
|
33
|
+
|
34
|
+
setup do
|
35
|
+
|
36
|
+
end
|
37
|
+
|
38
|
+
#=begin
|
39
|
+
#-------------------------------------------------------------------------------------
|
40
|
+
#
|
41
|
+
#-------------------------------------------------------------------------------------
|
42
|
+
|
43
|
+
should "parse a csv file the quick way with headers" do
|
44
|
+
|
45
|
+
# Reads all rows in memory and return and array of arrays. Each line is stored in
|
46
|
+
# one array. Data is stored in the 'rows' instance variable.
|
47
|
+
# Create the reader with all default parameters. Headers are converted from string
|
48
|
+
# to symbol
|
49
|
+
reader = Jcsv.reader("../data/customer.csv")
|
50
|
+
|
51
|
+
# now read the whole csv file
|
52
|
+
content = reader.read
|
53
|
+
|
54
|
+
# Headers are converted to symbol
|
55
|
+
assert_equal([:customer_no, :first_name, :last_name, :birth_date, :mailing_address,
|
56
|
+
:married, :number_of_kids, :favourite_quote, :email, :loyalty_points],
|
57
|
+
reader.headers)
|
58
|
+
|
59
|
+
assert_equal(["1", "John", "Dunbar", "13/06/1945",
|
60
|
+
"1600 Amphitheatre Parkway\nMountain View, CA 94043\nUnited States",
|
61
|
+
nil, nil, "\"May the Force be with you.\" - Star Wars",
|
62
|
+
"jdunbar@gmail.com", "0"], content[0])
|
63
|
+
|
64
|
+
end
|
65
|
+
|
66
|
+
#-------------------------------------------------------------------------------------
|
67
|
+
#
|
68
|
+
#-------------------------------------------------------------------------------------
|
69
|
+
|
70
|
+
should "leave headers as string" do
|
71
|
+
|
72
|
+
# Reads all rows in memory and return and array of arrays. Each line is stored in
|
73
|
+
# one array. Data is stored in the 'rows' instance variable.
|
74
|
+
# Headers are kept as strings instead of symbol
|
75
|
+
reader = Jcsv.reader("../data/customer.csv", strings_as_keys: true)
|
76
|
+
|
77
|
+
# now read the whole csv file
|
78
|
+
content = reader.read
|
79
|
+
|
80
|
+
assert_equal(["customerNo", "firstName", "lastName", "birthDate", "mailingAddress",
|
81
|
+
"married", "numberOfKids", "favouriteQuote", "email", "loyaltyPoints"],
|
82
|
+
reader.headers)
|
83
|
+
|
84
|
+
end
|
85
|
+
|
86
|
+
#-------------------------------------------------------------------------------------
|
87
|
+
#
|
88
|
+
#-------------------------------------------------------------------------------------
|
89
|
+
|
90
|
+
should "parse a csv file passing a block" do
|
91
|
+
|
92
|
+
# read lines and pass them to a block for processing. The block receives the
|
93
|
+
# line_no (last line of the record), row_no, row and the headers. If has_haders is
|
94
|
+
# false, then headers will be nil. Instead of
|
95
|
+
# method foreach, one could also use method 'read' with a block. 'read' and
|
96
|
+
# 'foreach' are identical.
|
97
|
+
reader = Jcsv.reader("../data/customer.csv", headers: true, strings_as_keys: true)
|
98
|
+
|
99
|
+
reader.read do |line_no, row_no, row, headers|
|
100
|
+
assert_equal(4, line_no) if row_no == 2
|
101
|
+
assert_equal(7, line_no) if row_no == 3
|
102
|
+
assert_equal(10, line_no) if row_no == 4
|
103
|
+
assert_equal(13, line_no) if row_no == 5
|
104
|
+
|
105
|
+
assert_equal(["customerNo", "firstName", "lastName", "birthDate",
|
106
|
+
"mailingAddress", "married", "numberOfKids", "favouriteQuote",
|
107
|
+
"email", "loyaltyPoints"], headers)
|
108
|
+
|
109
|
+
# Since the file has a header, the third record is row_no = 4
|
110
|
+
assert_equal(["3", "Alice", "Wunderland",
|
111
|
+
"08/08/1985", "One Microsoft Way\nRedmond, WA 98052-6399\nUnited States",
|
112
|
+
"Y", "0", "\"Play it, Sam. Play \"As Time Goes By.\"\" - Casablanca",
|
113
|
+
"throughthelookingglass@yahoo.com", "2255887799"], row) if row_no == 4
|
114
|
+
end
|
115
|
+
|
116
|
+
end
|
117
|
+
|
118
|
+
#-------------------------------------------------------------------------------------
|
119
|
+
#
|
120
|
+
#-------------------------------------------------------------------------------------
|
121
|
+
|
122
|
+
should "parse a csv file with filters" do
|
123
|
+
|
124
|
+
# Add filters, to filter the columns according to given rules. numberOfKids is
|
125
|
+
# optional and should be converted to and int. married is optional and should be
|
126
|
+
# converted to a boolean
|
127
|
+
parser = Jcsv.reader("../data/customer.csv", default_filter: Jcsv.not_nil)
|
128
|
+
|
129
|
+
# Add filters, so that we get 'objects' instead of strings for filtered fields
|
130
|
+
parser.filters = {:number_of_kids => Jcsv.optional >> Jcsv.int,
|
131
|
+
:married => Jcsv.optional >> Jcsv.bool,
|
132
|
+
:customer_no => Jcsv.int,
|
133
|
+
:birth_date => Jcsv.date("dd/MM/yyyy")}
|
134
|
+
|
135
|
+
parser.read do |line_no, row_no, row, headers|
|
136
|
+
|
137
|
+
# First field is customer number, which is converted to int
|
138
|
+
assert_equal(1, row[0]) if row_no == 2
|
139
|
+
assert_equal("John", row[1]) if row_no == 2
|
140
|
+
# Field 5 is :married. It is optional, so leaving it blank (nil) is ok.
|
141
|
+
assert_equal(nil, row[5]) if row_no == 2
|
142
|
+
|
143
|
+
# notice that field married that was "Y" is now true. Number of kids is not "0",
|
144
|
+
# but 0, customerNo is also and int
|
145
|
+
assert_equal(true, row[5]) if row_no == 3
|
146
|
+
|
147
|
+
end
|
148
|
+
|
149
|
+
end
|
150
|
+
|
151
|
+
#-------------------------------------------------------------------------------------
|
152
|
+
#
|
153
|
+
#-------------------------------------------------------------------------------------
|
154
|
+
|
155
|
+
should "Read file in chunks passing a block" do
|
156
|
+
|
157
|
+
# Read chunks of the file. In this case, we are breaking the file in chunks of 2
|
158
|
+
reader = Jcsv.reader("../data/customer.csv", chunk_size: 2)
|
159
|
+
|
160
|
+
# Add filters, so that we get 'objects' instead of strings for filtered fields
|
161
|
+
reader.filters = {:number_of_kids => Jcsv.optional >> Jcsv.int,
|
162
|
+
:married => Jcsv.optional >> Jcsv.bool,
|
163
|
+
:customer_no => Jcsv.int}
|
164
|
+
|
165
|
+
reader.each do |line_no, row_no, chunk, headers|
|
166
|
+
# line_no and row_no are the last read line_no and row_no of the chunk. Since we
|
167
|
+
# have headers and are reading in chunks of two, the first chunk has row_no = 3
|
168
|
+
assert_equal([[1, "John", "Dunbar", "13/06/1945",
|
169
|
+
"1600 Amphitheatre Parkway\nMountain View, CA 94043\nUnited States",
|
170
|
+
nil, nil, "\"May the Force be with you.\" - Star Wars",
|
171
|
+
"jdunbar@gmail.com", "0"],
|
172
|
+
[2, "Bob", "Down", "25/02/1919",
|
173
|
+
"1601 Willow Rd.\nMenlo Park, CA 94025\nUnited States",
|
174
|
+
true, 0, "\"Frankly, my dear, I don't give a damn.\" - Gone With The Wind",
|
175
|
+
"bobdown@hotmail.com", "123456"]], chunk) if row_no == 3
|
176
|
+
end
|
177
|
+
|
178
|
+
end
|
179
|
+
|
180
|
+
|
181
|
+
#-------------------------------------------------------------------------------------
|
182
|
+
#
|
183
|
+
#-------------------------------------------------------------------------------------
|
184
|
+
|
185
|
+
should "Read file in chunks, last chunk smaller" do
|
186
|
+
|
187
|
+
# Read chunks of the file. In this case, we are breaking the file in chunks of 3.
|
188
|
+
# Since we only have 4 rows, the first chunk will have 3 rows and the second chunk
|
189
|
+
# will have 1 row
|
190
|
+
reader = Jcsv.reader("../data/customer.csv", chunk_size: 3)
|
191
|
+
|
192
|
+
enum = reader.each do |line_no, row_no, chunk, headers|
|
193
|
+
assert_equal([["1", "John", "Dunbar", "13/06/1945",
|
194
|
+
"1600 Amphitheatre Parkway\nMountain View, CA 94043\nUnited States",
|
195
|
+
nil, nil,
|
196
|
+
"\"May the Force be with you.\" - Star Wars", "jdunbar@gmail.com", "0"],
|
197
|
+
["2", "Bob", "Down", "25/02/1919",
|
198
|
+
"1601 Willow Rd.\nMenlo Park, CA 94025\nUnited States",
|
199
|
+
"Y", "0", "\"Frankly, my dear, I don't give a damn.\" - Gone With The Wind",
|
200
|
+
"bobdown@hotmail.com", "123456"],
|
201
|
+
["3", "Alice", "Wunderland", "08/08/1985",
|
202
|
+
"One Microsoft Way\nRedmond, WA 98052-6399\nUnited States", "Y", "0",
|
203
|
+
"\"Play it, Sam. Play \"As Time Goes By.\"\" - Casablanca",
|
204
|
+
"throughthelookingglass@yahoo.com", "2255887799"]], chunk) if row_no == 4
|
205
|
+
|
206
|
+
assert_equal([["4", "Bill", "Jobs", "10/07/1973",
|
207
|
+
"2701 San Tomas Expressway\nSanta Clara, CA 95050\nUnited States", "Y", "3",
|
208
|
+
"\"You've got to ask yourself one question: \"Do I feel lucky?\" Well, do ya, punk?\" - Dirty Harry",
|
209
|
+
"billy34@hotmail.com", "36"]], chunk) if row_no == 5
|
210
|
+
|
211
|
+
end
|
212
|
+
|
213
|
+
end
|
214
|
+
|
215
|
+
#-------------------------------------------------------------------------------------
|
216
|
+
#
|
217
|
+
#-------------------------------------------------------------------------------------
|
218
|
+
|
219
|
+
should "Read file in one big chunk" do
|
220
|
+
|
221
|
+
p "TODO: add some test cases... no need to go into the tutorial."
|
222
|
+
|
223
|
+
# Read chunks of the file. In this case, we are breaking the file in chunks of 2
|
224
|
+
reader = Jcsv.reader("../data/customer.csv", chunk_size: :all)
|
225
|
+
|
226
|
+
# Add filters, so that we get 'objects' instead of strings for filtered fields
|
227
|
+
reader.filters = {:number_of_kids => Jcsv.optional >> Jcsv.int,
|
228
|
+
:married => Jcsv.optional >> Jcsv.bool,
|
229
|
+
:customer_no => Jcsv.int}
|
230
|
+
end
|
231
|
+
|
232
|
+
#-------------------------------------------------------------------------------------
|
233
|
+
#
|
234
|
+
#-------------------------------------------------------------------------------------
|
235
|
+
|
236
|
+
should "Read file in chunks as enumerator" do
|
237
|
+
|
238
|
+
reader = Jcsv.reader("../data/customer.csv", chunk_size: 2)
|
239
|
+
|
240
|
+
# Add filters, so that we get 'objects' instead of strings for filtered fields
|
241
|
+
reader.filters = {:number_of_kids => Jcsv.optional >> Jcsv.int,
|
242
|
+
:married => Jcsv.optional >> Jcsv.bool,
|
243
|
+
:customer_no => Jcsv.int}
|
244
|
+
|
245
|
+
# Method each without a block returns an enumerator
|
246
|
+
enum = reader.each
|
247
|
+
|
248
|
+
# read the first chunk. Chunk is of size 2
|
249
|
+
chunk = enum.next
|
250
|
+
|
251
|
+
assert_equal(7, chunk[0])
|
252
|
+
assert_equal(3, chunk[1])
|
253
|
+
assert_equal([[1, "John", "Dunbar", "13/06/1945",
|
254
|
+
"1600 Amphitheatre Parkway\nMountain View, CA 94043\nUnited States", nil, nil,
|
255
|
+
"\"May the Force be with you.\" - Star Wars", "jdunbar@gmail.com", "0"],
|
256
|
+
[2, "Bob", "Down", "25/02/1919",
|
257
|
+
"1601 Willow Rd.\nMenlo Park, CA 94025\nUnited States",
|
258
|
+
true, 0, "\"Frankly, my dear, I don't give a damn.\" - Gone With The Wind",
|
259
|
+
"bobdown@hotmail.com", "123456"]], chunk[2])
|
260
|
+
|
261
|
+
# read second chunk
|
262
|
+
c = enum.next
|
263
|
+
|
264
|
+
# trying to read another chunk will raise StopIteration
|
265
|
+
assert_raise ( StopIteration ) { enum.next }
|
266
|
+
|
267
|
+
end
|
268
|
+
|
269
|
+
#-------------------------------------------------------------------------------------
|
270
|
+
#
|
271
|
+
#-------------------------------------------------------------------------------------
|
272
|
+
|
273
|
+
should "Read file in chunks as enumerator... last chunk smaller" do
|
274
|
+
|
275
|
+
# Same test with a chunk_size of 3
|
276
|
+
reader = Jcsv.reader("../data/customer.csv", chunk_size: 3)
|
277
|
+
|
278
|
+
# Method each without a block returns an enumerator
|
279
|
+
enum = reader.each
|
280
|
+
|
281
|
+
# read first chunk. Does nothing with the data.
|
282
|
+
enum.next
|
283
|
+
|
284
|
+
|
285
|
+
# read second chunk... only one row will be returned
|
286
|
+
chunk = enum.next
|
287
|
+
|
288
|
+
# assert_equal()
|
289
|
+
assert_equal([["4", "Bill", "Jobs", "10/07/1973",
|
290
|
+
"2701 San Tomas Expressway\nSanta Clara, CA 95050\nUnited States", "Y", "3",
|
291
|
+
"\"You've got to ask yourself one question: \"Do I feel lucky?\" Well, do ya, punk?\" - Dirty Harry",
|
292
|
+
"billy34@hotmail.com", "36"]], chunk[2])
|
293
|
+
|
294
|
+
# trying to read another chunk will raise StopIteration
|
295
|
+
assert_raise ( StopIteration ) { enum.next }
|
296
|
+
|
297
|
+
end
|
298
|
+
|
299
|
+
#-------------------------------------------------------------------------------------
|
300
|
+
#
|
301
|
+
#-------------------------------------------------------------------------------------
|
302
|
+
|
303
|
+
should "Read file skipping columns" do
|
304
|
+
|
305
|
+
reader = Jcsv.reader("../data/customer.csv")
|
306
|
+
|
307
|
+
# Add mapping. When column is mapped to false, it will not be retrieved from the
|
308
|
+
# file, improving time and speed efficiency
|
309
|
+
reader.mapping = {:customer_no => false, :number_of_kids => false,
|
310
|
+
:loyalty_points => false}
|
311
|
+
|
312
|
+
reader.read do |line_no, row_no, chunk, headers|
|
313
|
+
assert_equal([:first_name, :last_name, :birth_date, :mailing_address, :married,
|
314
|
+
:favourite_quote, :email], headers)
|
315
|
+
if (row_no == 2)
|
316
|
+
assert_equal("John", chunk[0])
|
317
|
+
assert_equal("Dunbar", chunk[1])
|
318
|
+
end
|
319
|
+
end
|
320
|
+
|
321
|
+
end
|
322
|
+
|
323
|
+
#-------------------------------------------------------------------------------------
|
324
|
+
#
|
325
|
+
#-------------------------------------------------------------------------------------
|
326
|
+
|
327
|
+
should "Read file columns as labels" do
|
328
|
+
|
329
|
+
reader = Jcsv.reader("../data/customer.csv")
|
330
|
+
|
331
|
+
# mapping cannot be to true
|
332
|
+
assert_raise (RuntimeError) {
|
333
|
+
reader.mapping = {:customer_no => true, :number_of_kids => true,
|
334
|
+
:loyalty_points => true}}
|
335
|
+
|
336
|
+
end
|
337
|
+
|
338
|
+
#=end
|
339
|
+
|
340
|
+
#-------------------------------------------------------------------------------------
|
341
|
+
#
|
342
|
+
#-------------------------------------------------------------------------------------
|
343
|
+
|
344
|
+
should "Read file skipping columns, with headers as string" do
|
345
|
+
|
346
|
+
reader = Jcsv.reader("../data/customer.csv", strings_as_keys: true)
|
347
|
+
|
348
|
+
# Add mapping. When column is mapped to false, it will not be retrieved from the
|
349
|
+
# file, improving time and speed efficiency
|
350
|
+
reader.mapping = {"customerNo" => false, "numberOfKids" => false,
|
351
|
+
"loyaltyPoints" => false}
|
352
|
+
|
353
|
+
reader.read do |line_no, row_no, chunk, headers|
|
354
|
+
assert_equal(["firstName", "lastName", "birthDate", "mailingAddress", "married",
|
355
|
+
"favouriteQuote", "email"], headers)
|
356
|
+
if (row_no == 2)
|
357
|
+
assert_equal("John", chunk[0])
|
358
|
+
assert_equal("Dunbar", chunk[1])
|
359
|
+
end
|
360
|
+
end
|
361
|
+
|
362
|
+
end
|
363
|
+
|
364
|
+
#-------------------------------------------------------------------------------------
|
365
|
+
#
|
366
|
+
#-------------------------------------------------------------------------------------
|
367
|
+
|
368
|
+
should "Read file reordering columns" do
|
369
|
+
|
370
|
+
# Here we are setting headers to false, so the first line will not be considere
|
371
|
+
# a header.
|
372
|
+
reader = Jcsv.reader("../data/customer.csv", chunk_size: 2)
|
373
|
+
# reading the headers returns false
|
374
|
+
# assert_equal(false, reader.headers)
|
375
|
+
|
376
|
+
reader.filters = {:number_of_kids => Jcsv.optional >> Jcsv.int,
|
377
|
+
:married => Jcsv.optional >> Jcsv.bool,
|
378
|
+
:customer_no => Jcsv.int}
|
379
|
+
|
380
|
+
# Mapping allows reordering of columns. In this example, column 0 (:customerno)
|
381
|
+
# in the csv file will be loaded in position 2 (3rd column); column 1 (:firstname)
|
382
|
+
# in the csv file will be loaded in position 0 (1st column); column 2 on the csv file
|
383
|
+
# will not be loaded (false); column 4 (:birthdate) will be loaded on position 3,
|
384
|
+
# and so on.
|
385
|
+
# When reordering columns, care should be taken to get the mapping right or unexpected
|
386
|
+
# behaviour could result.
|
387
|
+
reader.mapping = {:customer_no => 2, :first_name => 0, :last_name => false,
|
388
|
+
:birth_date => 3, :mailing_address => false, :married => false,
|
389
|
+
:number_of_kids => false, :favourite_quote => false, :email => 1,
|
390
|
+
:loyalty_points => 4}
|
391
|
+
|
392
|
+
reader.read do |line_no, row_no, chunk, headers|
|
393
|
+
assert_equal([:first_name, :email, :customer_no, :birth_date, :loyalty_points],
|
394
|
+
headers)
|
395
|
+
assert_equal("John", chunk[0][0]) if row_no == 3
|
396
|
+
assert_equal("Alice", chunk[0][0]) if row_no == 5
|
397
|
+
end
|
398
|
+
|
399
|
+
end
|
400
|
+
=begin
|
401
|
+
#-------------------------------------------------------------------------------------
|
402
|
+
# JRuby fiber seems to have a bug. Don't know if only JRuby fiber or fibers in
|
403
|
+
# general. When returning the first element the second is also retrieved (look
|
404
|
+
# forward: might be a reason, but prevents changing the behaviour in between calls to
|
405
|
+
# next.
|
406
|
+
#-------------------------------------------------------------------------------------
|
407
|
+
|
408
|
+
should "allow changing parameters in between reads" do
|
409
|
+
|
410
|
+
p "testing fiber"
|
411
|
+
|
412
|
+
# Start with chunk_size 1
|
413
|
+
reader = Jcsv.reader("../data/customer.csv", headers: true, chunk_size: 1)
|
414
|
+
|
415
|
+
# Method each without a block returns an enumerator
|
416
|
+
enum = reader.each
|
417
|
+
|
418
|
+
# read first chunk. Does nothing with the data. Got only one line of data
|
419
|
+
p enum.next
|
420
|
+
|
421
|
+
# change chunk_size to 2
|
422
|
+
reader.chunk_size = 2
|
423
|
+
|
424
|
+
# read second chunk... only one row will be returned
|
425
|
+
chunk = enum.next
|
426
|
+
p chunk
|
427
|
+
# assert_equal()
|
428
|
+
|
429
|
+
p enum.next
|
430
|
+
|
431
|
+
end
|
432
|
+
=end
|
433
|
+
end
|
434
|
+
|
435
|
+
end
|
436
|
+
|
@@ -0,0 +1,209 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
3
|
+
##########################################################################################
|
4
|
+
# Copyright © 2015 Rodrigo Botafogo. All Rights Reserved. Permission to use, copy, modify,
|
5
|
+
# and distribute this software and its documentation for educational, research, and
|
6
|
+
# not-for-profit purposes, without fee and without a signed licensing agreement, is hereby
|
7
|
+
# granted, provided that the above copyright notice, this paragraph and the following two
|
8
|
+
# paragraphs appear in all copies, modifications, and distributions. Contact Rodrigo
|
9
|
+
# Botafogo - rodrigo.a.botafogo@gmail.com for commercial licensing opportunities.
|
10
|
+
#
|
11
|
+
# IN NO EVENT SHALL RODRIGO BOTAFOGO BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
|
12
|
+
# INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
|
13
|
+
# THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF RODRIGO BOTAFOGO HAS BEEN ADVISED OF THE
|
14
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
15
|
+
#
|
16
|
+
# RODRIGO BOTAFOGO SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
17
|
+
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
|
18
|
+
# SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS".
|
19
|
+
# RODRIGO BOTAFOGO HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS,
|
20
|
+
# OR MODIFICATIONS.
|
21
|
+
##########################################################################################
|
22
|
+
|
23
|
+
require 'rubygems'
|
24
|
+
require 'test/unit'
|
25
|
+
require 'shoulda'
|
26
|
+
require 'date'
|
27
|
+
|
28
|
+
require_relative '../config' if !@platform
|
29
|
+
|
30
|
+
require 'jcsv'
|
31
|
+
|
32
|
+
class CSVTest < Test::Unit::TestCase
|
33
|
+
|
34
|
+
context "CSV test" do
|
35
|
+
|
36
|
+
setup do
|
37
|
+
|
38
|
+
end
|
39
|
+
|
40
|
+
#-------------------------------------------------------------------------------------
|
41
|
+
#
|
42
|
+
#-------------------------------------------------------------------------------------
|
43
|
+
|
44
|
+
should "parse a csv file to map the quick way" do
|
45
|
+
|
46
|
+
reader = Jcsv.reader("../data/customer.csv", format: :map)
|
47
|
+
# map is an array of hashes
|
48
|
+
map = reader.read
|
49
|
+
|
50
|
+
# get customerNo of second row
|
51
|
+
assert_equal("2", map[1][:customer_no])
|
52
|
+
# loyaltyPoints from 4th row
|
53
|
+
assert_equal("36", map[3][:loyalty_points])
|
54
|
+
|
55
|
+
end
|
56
|
+
|
57
|
+
#-------------------------------------------------------------------------------------
|
58
|
+
#
|
59
|
+
#-------------------------------------------------------------------------------------
|
60
|
+
|
61
|
+
should "parse a csv file to map without filters nor mappings in chunks" do
|
62
|
+
|
63
|
+
# type is :map. Rows are hashes. Set the default filter to not_nil. That is, all
|
64
|
+
# fields are required unless explicitly set to optional.
|
65
|
+
reader = Jcsv.reader("../data/customer.csv", format: :map, chunk_size: 2,
|
66
|
+
strings_as_keys: true)
|
67
|
+
|
68
|
+
map = reader.read
|
69
|
+
|
70
|
+
# since chunk_size = 2, but we didn't pass a block to reader, we will get back
|
71
|
+
# 1 array, with 2 arrays each with a chunk. Every element of the internal arrays
|
72
|
+
# are maps (hashes)
|
73
|
+
|
74
|
+
# Bellow we are looking at the second chunk, element 0. Since our chunks are of
|
75
|
+
# size 2, the second chunk, element 0 is the third row.
|
76
|
+
assert_equal("2255887799", map[1][0]["loyaltyPoints"])
|
77
|
+
|
78
|
+
end
|
79
|
+
|
80
|
+
#-------------------------------------------------------------------------------------
|
81
|
+
#
|
82
|
+
#-------------------------------------------------------------------------------------
|
83
|
+
|
84
|
+
should "parse a csv file to map" do
|
85
|
+
|
86
|
+
# type is :map. Rows are hashes. Set the default filter to not_nil. That is, all
|
87
|
+
# fields are required unless explicitly set to optional.
|
88
|
+
reader = Jcsv.reader("../data/customer.csv", format: :map, default_filter: Jcsv.not_nil)
|
89
|
+
|
90
|
+
# Set numberOfKids and married as optional, otherwise an exception will be raised
|
91
|
+
reader.filters = {:number_of_kids => Jcsv.optional >> Jcsv.int,
|
92
|
+
:married => Jcsv.optional >> Jcsv.bool,
|
93
|
+
:loyalty_points => Jcsv.long,
|
94
|
+
:customer_no => Jcsv.int,
|
95
|
+
:birth_date => Jcsv.date("dd/MM/yyyy")}
|
96
|
+
|
97
|
+
# When parsing to map, it is possible to make a mapping. If column name is :false
|
98
|
+
# the column will be removed from the returned row
|
99
|
+
reader.mapping = {:number_of_kids => :numero_criancas,
|
100
|
+
:married => "casado",
|
101
|
+
:loyalty_points => "pontos fidelidade",
|
102
|
+
:customer_no => false}
|
103
|
+
|
104
|
+
reader.read do |line_no, row_no, row, headers|
|
105
|
+
if (row_no == 5)
|
106
|
+
assert_equal(nil, row[:customer_no])
|
107
|
+
assert_equal("Bill", row[:first_name])
|
108
|
+
assert_equal(true, row["casado"])
|
109
|
+
assert_equal("1973-07-10T00:00:00+00:00", row[:birth_date].to_s)
|
110
|
+
assert_equal("2701 San Tomas Expressway\nSanta Clara, CA 95050\nUnited States",
|
111
|
+
row[:mailing_address])
|
112
|
+
assert_equal(3, row[:numero_criancas])
|
113
|
+
end
|
114
|
+
|
115
|
+
end
|
116
|
+
|
117
|
+
end
|
118
|
+
|
119
|
+
#-------------------------------------------------------------------------------------
|
120
|
+
#
|
121
|
+
#-------------------------------------------------------------------------------------
|
122
|
+
|
123
|
+
should "raise exception if no header when reading map" do
|
124
|
+
|
125
|
+
# Will raise an exception as reading a file as map requires the header
|
126
|
+
assert_raise ( Jcsv::MissingHeadersError ) {
|
127
|
+
Jcsv.reader("../data/customer.csv", format: :map, headers: false)
|
128
|
+
}
|
129
|
+
|
130
|
+
end
|
131
|
+
|
132
|
+
#-------------------------------------------------------------------------------------
|
133
|
+
#
|
134
|
+
#-------------------------------------------------------------------------------------
|
135
|
+
|
136
|
+
should "raise ConstraintViolation" do
|
137
|
+
|
138
|
+
reader = Jcsv.reader("../data/customer.csv", format: :map, default_filter: Jcsv.not_nil,
|
139
|
+
headers: true, strings_as_keys: true)
|
140
|
+
|
141
|
+
# Set numberOfKids and married as optional, otherwise an exception will be raised
|
142
|
+
reader.filters = {"numberOfKids" => Jcsv.optional >> Jcsv.int,
|
143
|
+
"loyaltyPoints" => Jcsv.long,
|
144
|
+
"customerNo" => Jcsv.int,
|
145
|
+
"birthDate" => Jcsv.date("dd/mm/yyyy")}
|
146
|
+
|
147
|
+
# reader.read { |line_no, row_no, row, headers| }
|
148
|
+
# Will raise an exception, as the default_filter is not_nil and there is a record
|
149
|
+
# in which field 'married' is nil
|
150
|
+
assert_raise ( Jcsv::ConstraintViolation ) {
|
151
|
+
reader.read { |line_no, row_no, row, headers| } }
|
152
|
+
|
153
|
+
end
|
154
|
+
|
155
|
+
#-------------------------------------------------------------------------------------
|
156
|
+
#
|
157
|
+
#-------------------------------------------------------------------------------------
|
158
|
+
|
159
|
+
should "catch FilterError" do
|
160
|
+
|
161
|
+
reader = Jcsv.reader("../data/customer.csv", format: :map, headers: true,
|
162
|
+
strings_as_keys: true)
|
163
|
+
# Set numberOfKids and married as optional, otherwise an exception will be raised
|
164
|
+
reader.filters = {"numberOfKids" => Jcsv.optional >> Jcsv.int,
|
165
|
+
"loyaltyPoints" => Jcsv.bool,
|
166
|
+
"customerNo" => Jcsv.int,
|
167
|
+
"birthDate" => Jcsv.date("dd/mm/yyyy")}
|
168
|
+
|
169
|
+
begin
|
170
|
+
reader.read do |line_no, row_no, row, headers|
|
171
|
+
p row
|
172
|
+
end
|
173
|
+
rescue Jcsv::FilterError => e
|
174
|
+
puts e.message
|
175
|
+
retry
|
176
|
+
end
|
177
|
+
|
178
|
+
end
|
179
|
+
|
180
|
+
#-------------------------------------------------------------------------------------
|
181
|
+
#
|
182
|
+
#-------------------------------------------------------------------------------------
|
183
|
+
|
184
|
+
should "Read file in chunks passing a block as iterator" do
|
185
|
+
|
186
|
+
# Read chunks of the file. In this case, we are breaking the file in chunks of two
|
187
|
+
reader = Jcsv.reader("../data/customer.csv", chunk_size: 2, format: :map,
|
188
|
+
strings_as_keys: true)
|
189
|
+
|
190
|
+
# Add filters, so that we get 'objects' instead of strings for filtered fields
|
191
|
+
reader.filters = {"numberOfKids" => Jcsv.optional >> Jcsv.int,
|
192
|
+
"married" => Jcsv.optional >> Jcsv.bool,
|
193
|
+
"customerNo" => Jcsv.int}
|
194
|
+
|
195
|
+
iter = reader.each
|
196
|
+
chunk1 = iter.next
|
197
|
+
# 3rd item in the chunk1 array is the data. 1st item is the line_no and 2nd item
|
198
|
+
# row_no. Chunks are of size 2, so chunk1[2][1] is the second element of the first
|
199
|
+
# chunk
|
200
|
+
assert_equal(2, chunk1[2][1]["customerNo"])
|
201
|
+
assert_equal("Down", chunk1[2][1]["lastName"])
|
202
|
+
|
203
|
+
chunk2 = iter.next
|
204
|
+
|
205
|
+
end
|
206
|
+
|
207
|
+
end
|
208
|
+
|
209
|
+
end
|