mdarray-jcsv 0.6.3-java

Sign up to get free protection for your applications and to get access to all the features.
Files changed (44) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +23 -0
  3. data/README.md +2 -0
  4. data/Rakefile +46 -0
  5. data/config.rb +104 -0
  6. data/lib/constraints.rb +205 -0
  7. data/lib/date_filters.rb +252 -0
  8. data/lib/dimensions.rb +276 -0
  9. data/lib/filters.rb +332 -0
  10. data/lib/jcsv.rb +107 -0
  11. data/lib/list_reader.rb +200 -0
  12. data/lib/locale.rb +192 -0
  13. data/lib/map_reader.rb +192 -0
  14. data/lib/mdarray-jcsv.rb +24 -0
  15. data/lib/mdarray_reader.rb +110 -0
  16. data/lib/numeric_filters.rb +225 -0
  17. data/lib/reader.rb +547 -0
  18. data/lib/supercsv_interface.rb +231 -0
  19. data/test/test_complete.rb +37 -0
  20. data/test/test_critbit.rb +442 -0
  21. data/test/test_customer_list.rb +436 -0
  22. data/test/test_customer_map.rb +209 -0
  23. data/test/test_customer_nhlist.rb +161 -0
  24. data/test/test_deep_map.rb +264 -0
  25. data/test/test_del.rb +73 -0
  26. data/test/test_dimensions.rb +231 -0
  27. data/test/test_example.rb +79 -0
  28. data/test/test_filters.rb +374 -0
  29. data/test/test_list_dimensions.rb +110 -0
  30. data/test/test_mdarray.rb +227 -0
  31. data/test/test_missing_data.rb +57 -0
  32. data/vendor/commons-beanutils-1.8.3.jar +0 -0
  33. data/vendor/commons-lang3-3.1.jar +0 -0
  34. data/vendor/dozer-5.4.0.jar +0 -0
  35. data/vendor/jcl-over-slf4j-1.6.6.jar +0 -0
  36. data/vendor/joda-time-2.7.jar +0 -0
  37. data/vendor/slf4j-api-1.7.5.jar +0 -0
  38. data/vendor/snakeyaml-1.14.jar +0 -0
  39. data/vendor/super-csv-2.4.0.jar +0 -0
  40. data/vendor/super-csv-dozer-2.4.0.jar +0 -0
  41. data/vendor/super-csv-java8-2.4.0.jar +0 -0
  42. data/vendor/super-csv-joda-2.4.0.jar +0 -0
  43. data/version.rb +2 -0
  44. metadata +196 -0
@@ -0,0 +1,231 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ ##########################################################################################
4
+ # @author Rodrigo Botafogo
5
+ #
6
+ # Copyright © 2015 Rodrigo Botafogo. All Rights Reserved. Permission to use, copy, modify,
7
+ # and distribute this software and its documentation, without fee and without a signed
8
+ # licensing agreement, is hereby granted, provided that the above copyright notice, this
9
+ # paragraph and the following two paragraphs appear in all copies, modifications, and
10
+ # distributions.
11
+ #
12
+ # IN NO EVENT SHALL RODRIGO BOTAFOGO BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
13
+ # INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
14
+ # THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF RODRIGO BOTAFOGO HAS BEEN ADVISED OF THE
15
+ # POSSIBILITY OF SUCH DAMAGE.
16
+ #
17
+ # RODRIGO BOTAFOGO SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
18
+ # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
19
+ # SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS".
20
+ # RODRIGO BOTAFOGO HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS,
21
+ # OR MODIFICATIONS.
22
+ ##########################################################################################
23
+
24
+ require_relative 'dimensions'
25
+
26
+ class Jcsv
27
+ include_package "org.supercsv.cellprocessor.ift"
28
+
29
+ #========================================================================================
30
+ # Mapping contains a mapping from column names to:
31
+ # * other column names: when we want to change the name of the column
32
+ # * false: when we want to remove the column from reading
33
+ # * true: when the column is a dimensions
34
+ # If there is no mapping then the column number maps to itself
35
+ #========================================================================================
36
+
37
+ class Mapping
38
+
39
+ attr_accessor :mapping
40
+
41
+ def initialize
42
+ @mapping = nil
43
+ end
44
+
45
+ def [](index)
46
+ # p "#{@mapping}, #{index}"
47
+ (@mapping.nil?)? index : @mapping[index]
48
+ end
49
+
50
+ def []=(index, value)
51
+ @mapping[index] = value
52
+ end
53
+
54
+ end
55
+
56
+ #========================================================================================
57
+ # Module Processors interfaces the Ruby code with the SuperCsv cell processors.
58
+ #========================================================================================
59
+
60
+ module Processors
61
+ include_package "org.supercsv.util"
62
+ include_package "org.supercsv.exception"
63
+
64
+ attr_reader :dimensions
65
+ attr_reader :key_array
66
+
67
+ #---------------------------------------------------------------------------------------
68
+ #
69
+ #---------------------------------------------------------------------------------------
70
+
71
+ def headers
72
+ @headers ||= getHeader(true).to_a
73
+ end
74
+
75
+ #---------------------------------------------------------------------------------------
76
+ # This method uses variable @processed_columns that should be initialized in the class
77
+ # that includes this module. In the case of a list_reader for instance, processed_columns
78
+ # is initalized as an Array. For map_reader, processed_columns is initalized as a
79
+ # Hash. So, processed_columns is a data structure for storing the data processed. The
80
+ # mapping defines where the data should be stored in this data structure. In the case
81
+ # of list_reader, mapping[i] = i, for map_reader, mapping[i] = <mapping name for hash>
82
+ #---------------------------------------------------------------------------------------
83
+
84
+ def executeProcessors(processors)
85
+
86
+ source = getColumns()
87
+
88
+ context = CsvContext.new(getLineNumber(), getRowNumber(), 1);
89
+ context.setRowSource(source);
90
+
91
+ # raise "The number of columns to be processed #{source.size} must match the number of
92
+ # CellProcessors #{processors.length}" if (source.size != processors.length)
93
+
94
+ @key_array = Array.new
95
+
96
+ source.each_with_index do |s, i|
97
+ begin
98
+ # is @column_mapping[i] ever nil? I don't think so... CHECK!!!
99
+ next if ((@column_mapping[i] == false) || (@column_mapping[i].nil?))
100
+ # if column mapping is 'true', then this column is a dimension and the data in this
101
+ # column is part of the key
102
+ if (@column_mapping[i] == true)
103
+ begin
104
+ @dimensions[@headers[i]] = s
105
+ rescue RuntimeError => e
106
+ puts "Warning reading row: #{source.toString()} in field '#{@headers[i]}'. " +
107
+ e.message if !@suppress_warnings
108
+ # raise "Error reading row: #{source.toString()} in field '#{@headers[i]}'. " +
109
+ # e.message
110
+ end
111
+ @key_array[@dimensions.dimensions_names.index(@headers[i])] = s
112
+ next
113
+ end
114
+
115
+ context.setColumnNumber(i + 1)
116
+ if (i >= processors.size)
117
+ @processed_columns[@column_mapping[i]] = s
118
+ else
119
+ if (processors[i] == nil)
120
+ @processed_columns[@column_mapping[i]] = s
121
+ else
122
+ cell = processors[i].execute(s, context)
123
+ # cell = (cell.is_a? Jcsv::Pack)? cell.ruby_obj : cell
124
+ @processed_columns[@column_mapping[i]] = cell
125
+ end
126
+ end
127
+ rescue SuperCsvConstraintViolationException => e
128
+ raise Jcsv::ContraintViolation.new("Constraint violation: #{context.toString}")
129
+ end
130
+
131
+ end
132
+
133
+ @processed_columns
134
+
135
+ end
136
+
137
+ end
138
+
139
+ #========================================================================================
140
+ # Class CLR (CSV List Reader) wraps java CsvListReader.
141
+ #========================================================================================
142
+
143
+ class CLR < org.supercsv.io.CsvListReader
144
+ include_package "org.supercsv.cellprocessor.ift"
145
+ include Processors
146
+
147
+ #---------------------------------------------------------------------------------------
148
+ #
149
+ #---------------------------------------------------------------------------------------
150
+
151
+ def initialize(filereader, preferences, dimensions = nil, suppress_warnings)
152
+ @dimensions = dimensions
153
+ @suppress_warnings = suppress_warnings
154
+ super(filereader, preferences)
155
+ end
156
+
157
+ #---------------------------------------------------------------------------------------
158
+ #
159
+ #---------------------------------------------------------------------------------------
160
+
161
+ def read(column_mapping, filters)
162
+
163
+ # initialize @processed_columns to a new Array. This will be used by method
164
+ # executeProcessor from module Processors. @column_mapping also needs to be initialized
165
+ # to the column_mapping received. Used by methods in module Processors
166
+ @processed_columns = Array.new
167
+ @column_mapping = column_mapping
168
+
169
+ data_read = (filters == false)? super([].to_java(CellProcessor)) :
170
+ super(filters.values.to_java(CellProcessor))
171
+ data_read.unshift(@key_array) if dimensions && data_read
172
+ data_read
173
+ end
174
+
175
+ end
176
+
177
+ #========================================================================================
178
+ # class CMR (CSV Map Reader) wraps class CsvMapReader
179
+ #========================================================================================
180
+
181
+ class CMR < org.supercsv.io.CsvMapReader
182
+ include_package "org.supercsv.cellprocessor.ift"
183
+ include Processors
184
+
185
+ # When dimensions are defined, then the composition of all dimensions is the 'key'
186
+ # attr_reader :key
187
+
188
+ #---------------------------------------------------------------------------------------
189
+ #
190
+ #---------------------------------------------------------------------------------------
191
+
192
+ def initialize(filereader, preferences, dimensions = nil, suppress_warnings)
193
+ @dimensions = dimensions
194
+ @suppress_warnings = suppress_warnings
195
+ super(filereader, preferences)
196
+ end
197
+
198
+ #---------------------------------------------------------------------------------------
199
+ #
200
+ #---------------------------------------------------------------------------------------
201
+
202
+ def read(column_mapping, filters)
203
+
204
+ # initialize @processed_columns to a new Hash. This will be used by method
205
+ # executeProcessor from module Processors
206
+ @processed_columns = Hash.new
207
+ @column_mapping = column_mapping
208
+
209
+ (filters == false)? super(*column_mapping.mapping) :
210
+ filter_input(column_mapping, filters.values.to_java(CellProcessor))
211
+
212
+ end
213
+
214
+ #---------------------------------------------------------------------------------------
215
+ #
216
+ #---------------------------------------------------------------------------------------
217
+
218
+ def filter_input(name_mapping, processors)
219
+
220
+ if (readRow())
221
+ processed_columns = executeProcessors(processors)
222
+ processed_columns[:key] = @key_array if dimensions
223
+ return processed_columns
224
+ end
225
+
226
+ end
227
+
228
+ end
229
+
230
+ end
231
+
@@ -0,0 +1,37 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ ##########################################################################################
4
+ # Copyright © 2013 Rodrigo Botafogo. All Rights Reserved. Permission to use, copy, modify,
5
+ # and distribute this software and its documentation, without fee and without a signed
6
+ # licensing agreement, is hereby granted, provided that the above copyright notice, this
7
+ # paragraph and the following two paragraphs appear in all copies, modifications, and
8
+ # distributions.
9
+ #
10
+ # IN NO EVENT SHALL RODRIGO BOTAFOGO BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
11
+ # INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
12
+ # THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF RODRIGO BOTAFOGO HAS BEEN ADVISED OF THE
13
+ # POSSIBILITY OF SUCH DAMAGE.
14
+ #
15
+ # RODRIGO BOTAFOGO SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
16
+ # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
17
+ # SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS".
18
+ # RODRIGO BOTAFOGO HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS,
19
+ # OR MODIFICATIONS.
20
+ ##########################################################################################
21
+
22
+ require 'rubygems'
23
+ require "test/unit"
24
+ require 'shoulda'
25
+
26
+ require_relative '../config'
27
+ require 'jcsv'
28
+
29
+ require_relative 'test_customer_list'
30
+ require_relative 'test_customer_map'
31
+ require_relative 'test_dimensions'
32
+ require_relative 'test_deep_map'
33
+ require_relative 'test_critbit'
34
+ require_relative 'test_filters'
35
+ require_relative 'test_list_dimensions'
36
+ require_relative 'test_customer_nhlist'
37
+ require_relative 'test_mdarray'
@@ -0,0 +1,442 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ ##########################################################################################
4
+ # Copyright © 2015 Rodrigo Botafogo. All Rights Reserved. Permission to use, copy, modify,
5
+ # and distribute this software and its documentation for educational, research, and
6
+ # not-for-profit purposes, without fee and without a signed licensing agreement, is hereby
7
+ # granted, provided that the above copyright notice, this paragraph and the following two
8
+ # paragraphs appear in all copies, modifications, and distributions. Contact Rodrigo
9
+ # Botafogo - rodrigo.a.botafogo@gmail.com for commercial licensing opportunities.
10
+ #
11
+ # IN NO EVENT SHALL RODRIGO BOTAFOGO BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
12
+ # INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
13
+ # THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF RODRIGO BOTAFOGO HAS BEEN ADVISED OF THE
14
+ # POSSIBILITY OF SUCH DAMAGE.
15
+ #
16
+ # RODRIGO BOTAFOGO SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
17
+ # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
18
+ # SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS".
19
+ # RODRIGO BOTAFOGO HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS,
20
+ # OR MODIFICATIONS.
21
+ ##########################################################################################
22
+
23
+ require 'rubygems'
24
+ require 'test/unit'
25
+ require 'shoulda'
26
+ require 'matrix'
27
+
28
+ require 'pp'
29
+ require_relative '../config'
30
+
31
+ require 'jcsv'
32
+
33
+ class CSVTest < Test::Unit::TestCase
34
+
35
+ context "CSV test" do
36
+
37
+ setup do
38
+
39
+ end
40
+
41
+ #-------------------------------------------------------------------------------------
42
+ # When reading the CSV file in one big chunk and selecting deep_map: true, then each
43
+ # dimension will be hashed across all rows. [This is not clear at all!!! IMPROVE.]
44
+ #-------------------------------------------------------------------------------------
45
+
46
+ should "parse multi-dimension csv into a critbit, alphabetical order" do
47
+
48
+ reader = Jcsv.reader("../data/customer.csv", format: :critbit,
49
+ dimensions: [:last_name, :first_name])
50
+
51
+ customers = reader.read
52
+ assert_equal("Down.Bob", customers.keys[0])
53
+ assert_equal("Dunbar.John", customers.keys[1])
54
+
55
+ reader = Jcsv.reader("../data/customer.csv", format: :critbit,
56
+ dimensions: [:first_name, :last_name])
57
+
58
+ customers = reader.read
59
+ assert_equal("Alice.Wunderland", customers.keys[0])
60
+ assert_equal("Bill.Jobs", customers.keys[1])
61
+
62
+ end
63
+
64
+ #-------------------------------------------------------------------------------------
65
+ # Read data into a flat map. Allows random access to the data by use of the map
66
+ # 'key'. The 'key' is a string that concatenates the values of the dimensions's
67
+ # labels with a '.'.
68
+ #-------------------------------------------------------------------------------------
69
+
70
+ should "read data into flat critbit" do
71
+
72
+ reader = Jcsv.reader("../data/epilepsy.csv", format: :critbit,
73
+ dimensions: [:treatment, :subject, :period],
74
+ default_filter: Jcsv.int)
75
+
76
+ # remove the :patient field from the data, as this field is already given by the
77
+ # :subject field.
78
+ reader.mapping = {:patient => false}
79
+
80
+ # read all the data into a flat map (hash) with keys the dimensions values
81
+ # concatenated with '.'.
82
+ treatment = reader.read
83
+ # p treatment
84
+
85
+ assert_equal(11, treatment["placebo.1.1"][:base])
86
+ assert_equal(31, treatment["placebo.1.1"][:age])
87
+ assert_equal(5, treatment["placebo.1.1"][:"seizure.rate"])
88
+
89
+ assert_equal(31, treatment["Progabide.35.2"][:base])
90
+ assert_equal(30, treatment["Progabide.35.2"][:age])
91
+ assert_equal(17, treatment["Progabide.35.2"][:"seizure.rate"])
92
+
93
+ end
94
+
95
+ #-------------------------------------------------------------------------------------
96
+ # Read data into a flat map in chunks
97
+ #-------------------------------------------------------------------------------------
98
+
99
+ should "read data into flat critbit in chunks" do
100
+
101
+ # paramenter deep_map: is not passed. By default it is false
102
+ reader = Jcsv.reader("../data/epilepsy.csv", format: :critbit, chunk_size: 20,
103
+ dimensions: [:treatment, :subject, :period],
104
+ default_filter: Jcsv.int)
105
+
106
+ # remove the :patient field from the data, as this field is already given by the
107
+ # :subject field.
108
+ reader.mapping = {:patient => false}
109
+ treatment = reader.read
110
+ # p treatment
111
+
112
+ treatment_type = reader.dimensions[:treatment]
113
+ subject = reader.dimensions[:subject]
114
+ period = reader.dimensions[:period]
115
+
116
+ # variable labels has all dimension labels
117
+ assert_equal(0, treatment_type.labels["placebo"])
118
+ assert_equal(1, treatment_type.labels["Progabide"])
119
+ assert_equal(1, subject.labels["2"])
120
+ assert_equal(13, subject.labels["14"])
121
+ assert_equal(58, subject.labels["59"])
122
+ assert_equal(0, period.labels["1"])
123
+ assert_equal(3, period.labels["4"])
124
+
125
+ # we now need to access the first chunk [0] to get to the desired element
126
+ assert_equal(11, treatment[0]["placebo.1.1"][:base])
127
+ assert_equal(31, treatment[0]["placebo.1.1"][:age])
128
+ assert_equal(5, treatment[0]["placebo.1.1"][:"seizure.rate"])
129
+
130
+ # chunk [0] does not have key "Progabide.35.2"
131
+ assert_equal(nil, treatment[0]["Progabide.35.2"])
132
+
133
+ assert_equal(10, treatment[6]["Progabide.32.3"][:base])
134
+ assert_equal(30, treatment[6]["Progabide.32.3"][:age])
135
+ assert_equal(1, treatment[6]["Progabide.32.3"][:"seizure.rate"])
136
+
137
+ end
138
+
139
+ #-------------------------------------------------------------------------------------
140
+ #
141
+ #-------------------------------------------------------------------------------------
142
+
143
+ should "read to critbit in enumerable chunks" do
144
+
145
+ # paramenter deep_map: is not passed. By default it is false
146
+ reader = Jcsv.reader("../data/epilepsy.csv", format: :critbit, chunk_size: 20,
147
+ dimensions: [:treatment, :subject, :period],
148
+ default_filter: Jcsv.int)
149
+
150
+ # Method each without a block returns an enumerator
151
+ enum = reader.each
152
+
153
+ # read the first chunk. Chunk is of size 20
154
+ chunk = enum.next
155
+ data = chunk[2]
156
+
157
+ # in this case, only the first 20 rows were read, so only one treatment and six
158
+ # subjects were read until this point
159
+ assert_equal(1, reader.dimensions[:treatment].size)
160
+ # assert_equal(6, reader.dimensions[:subject].size)
161
+
162
+ assert_equal(8, data["placebo.4.4"][:base])
163
+ assert_equal(36, data["placebo.4.4"][:age])
164
+ assert_equal(4, data["placebo.4.4"][:"seizure.rate"])
165
+
166
+ # read the next chunk. Chunk is of size 20
167
+ chunk = enum.next
168
+
169
+ # read the next chunk... not interested in the second chunk for some reason...
170
+ chunk = enum.next
171
+ data = chunk[2]
172
+
173
+ # As we read new chunks of data, the dimensions labels accumulate, i.e., they are
174
+ # not erased between reads of every chunk (call to the next function). Dimensions
175
+ # are variables from the reader and not the chunk.
176
+ assert_equal(1, reader.dimensions[:treatment].size)
177
+ assert_equal(16, reader.dimensions[:subject].size)
178
+
179
+ assert_equal(33, data["placebo.12.2"][:base])
180
+ assert_equal(24, data["placebo.12.2"][:age])
181
+ assert_equal(6, data["placebo.12.2"][:"seizure.rate"])
182
+
183
+ end
184
+
185
+ #-------------------------------------------------------------------------------------
186
+ #
187
+ #-------------------------------------------------------------------------------------
188
+
189
+ should "read to critbit and pass to block with dimensions" do
190
+
191
+ reader = Jcsv.reader("../data/epilepsy.csv", format: :critbit,
192
+ dimensions: [:treatment, :subject, :period],
193
+ default_filter: Jcsv.int)
194
+
195
+ reader.read do |line_no, row_no, row|
196
+ assert_equal(1, row.keys.size)
197
+ end
198
+
199
+ end
200
+
201
+ #-------------------------------------------------------------------------------------
202
+ #
203
+ #-------------------------------------------------------------------------------------
204
+
205
+ should "read to critbit and pass to block with dimensions, chunk_size > 1" do
206
+
207
+ reader = Jcsv.reader("../data/epilepsy.csv", format: :critbit, chunk_size: 20,
208
+ dimensions: [:treatment, :subject, :period],
209
+ default_filter: Jcsv.int)
210
+
211
+ reader.read do |line_no, row_no, row|
212
+ assert_equal(20, row.keys.size) if line_no < 230
213
+ end
214
+
215
+ end
216
+
217
+ #-------------------------------------------------------------------------------------
218
+ #
219
+ #-------------------------------------------------------------------------------------
220
+
221
+ should "raise error if mapping a column to true in critbit" do
222
+
223
+ reader = Jcsv.reader("../data/epilepsy.csv", format: :critbit, chunk_size: :all,
224
+ dimensions: [:subject, :period],
225
+ default_filter: Jcsv.int)
226
+
227
+ # Raises an error, since mapping to true is not defined
228
+ assert_raise ( ArgumentError ) { reader.mapping =
229
+ {:treatment => false, :patient => true} }
230
+
231
+ end
232
+
233
+ #-------------------------------------------------------------------------------------
234
+ # When reading the CSV file in one big chunk and selecting deep_map: true, then each
235
+ # dimension will be hashed across all rows.
236
+ #-------------------------------------------------------------------------------------
237
+
238
+ should "parse multi-dimension csv file to critbit, chuk_size all and deep_map true" do
239
+
240
+ reader = Jcsv.reader("../data/epilepsy.csv", format: :critbit, chunk_size: :all,
241
+ dimensions: [:treatment, :subject, :period], deep_map: true)
242
+
243
+ # remove the :patient field from the data, as this field is already given by the
244
+ # :subject field.
245
+ reader.mapping = {:patient => false}
246
+
247
+ # since we are reading with chunk_size = :all, then we will only get one chunk back.
248
+ # Then we can get the first chunk by indexing read with 0: reader.read[0]
249
+ treatment = reader.read[0]
250
+ # p treatment
251
+
252
+ # get the dimensions
253
+ treatment_type = reader.dimensions[:treatment]
254
+ subject = reader.dimensions[:subject]
255
+ period = reader.dimensions[:period]
256
+
257
+ # variable labels has all dimension labels
258
+ assert_equal(0, treatment_type.labels["placebo"])
259
+ assert_equal(1, treatment_type.labels["Progabide"])
260
+ assert_equal(1, subject.labels["2"])
261
+ assert_equal(13, subject.labels["14"])
262
+ assert_equal(58, subject.labels["59"])
263
+ assert_equal(0, period.labels["1"])
264
+ assert_equal(3, period.labels["4"])
265
+
266
+ assert_equal("14", treatment["placebo"]["10"]["1"][:"seizure.rate"])
267
+
268
+ end
269
+
270
+ #-------------------------------------------------------------------------------------
271
+ #
272
+ #-------------------------------------------------------------------------------------
273
+
274
+ should "read data with dimensions, mapping and filters into a critbit" do
275
+
276
+ reader = Jcsv.reader("../data/epilepsy.csv", format: :critbit, chunk_size: :all,
277
+ dimensions: [:treatment, :subject, :period], deep_map: true,
278
+ default_filter: Jcsv.int)
279
+
280
+ # remove the :patient field from the data, as this field is already given by the
281
+ # :subject field.
282
+ reader.mapping = {:patient => false}
283
+ reader.filters = {:"seizure.rate" => Jcsv.float}
284
+
285
+ # will raise an exception as :period is not a key. Will break as soon as we read the
286
+ # first period for the second user
287
+ treatment = reader.read[0]
288
+ # p treatment
289
+
290
+ assert_equal(14.0, treatment["placebo"]["10"]["1"][:"seizure.rate"])
291
+ assert_equal(19.0, treatment["Progabide"]["45"]["1"][:"seizure.rate"])
292
+
293
+ end
294
+
295
+ #-------------------------------------------------------------------------------------
296
+ #
297
+ #-------------------------------------------------------------------------------------
298
+
299
+ should "read data with deep_map in critbit but chunk_size not all" do
300
+
301
+ reader = Jcsv.reader("../data/epilepsy.csv", format: :critbit, chunk_size: 20,
302
+ dimensions: [:treatment, :subject, :period], deep_map: true,
303
+ default_filter: Jcsv.int)
304
+
305
+ # remove the :patient field from the data, as this field is already given by the
306
+ # :subject field.
307
+ reader.mapping = {:patient => false}
308
+ reader.filters = {:"seizure.rate" => Jcsv.float}
309
+
310
+ # will raise an exception as :period is not a key. Will break as soon as we read the
311
+ # first period for the second user
312
+ treatment = reader.read
313
+
314
+ assert_equal(3.0, treatment[0]["placebo"]["2"]["1"][:"seizure.rate"])
315
+ # since only 20 rows read per chunk, there is no Progabide row yet. Note that there
316
+ # was data in the test above
317
+ assert_equal(nil, treatment[0]["Progabide"])
318
+
319
+ # chunk 10, has Progabide as a dimension
320
+ assert_equal(6.0, treatment[10]["Progabide"]["51"]["2"][:"seizure.rate"])
321
+
322
+ end
323
+
324
+ #-------------------------------------------------------------------------------------
325
+ #
326
+ #-------------------------------------------------------------------------------------
327
+
328
+ should "raise exception if key is repeated in critbit" do
329
+
330
+ reader = Jcsv.reader("../data/epilepsy.csv", format: :critbit, chunk_size: :all,
331
+ dimensions: [:period], deep_map: true)
332
+
333
+ # will raise an exception as :period is not a key. Will break as soon as we read the
334
+ # first period for the second user
335
+ assert_raise ( Jcsv::DuplicateKeyError ) { reader.read[0] }
336
+
337
+ end
338
+
339
+ #-------------------------------------------------------------------------------------
340
+ # When reading the CSV file in one big chunk and selecting deep_map: true, then each
341
+ # dimension will be hashed across all rows. [This is not clear at all!!! IMPROVE.]
342
+ #-------------------------------------------------------------------------------------
343
+
344
+ should "Show errors when dimensions are not in order or missing in critbit" do
345
+
346
+ reader = Jcsv.reader("../data/epilepsy.csv", format: :critbit, chunk_size: :all,
347
+ dimensions: [:period, :treatment, :subject], deep_map: true)
348
+
349
+ p "LOTS OF ERROR MESSAGES EXPECTED FROM HERE..."
350
+
351
+ # remove the :patient field from the data, as this field is already given by the
352
+ # :subject field.
353
+ reader.mapping = {:patient => false}
354
+
355
+ # since we are reading with chunk_size = :all, then we will only get one chunk back.
356
+ # Then we can get the first chunk by indexing read with 0: reader.read[0]
357
+ treatment = reader.read[0]
358
+
359
+ p "... TO HERE. If no error messages, then something is wrong!"
360
+
361
+ end
362
+
363
+ #-------------------------------------------------------------------------------------
364
+ # When reading the CSV file in one big chunk and selecting deep_map: true, then each
365
+ # dimension will be hashed across all rows. [This is not clear at all!!! IMPROVE.]
366
+ #-------------------------------------------------------------------------------------
367
+
368
+ should "Suppress warnings when dimensions are not in order or missing in critbit" do
369
+
370
+ reader = Jcsv.reader("../data/epilepsy.csv", format: :critbit, chunk_size: :all,
371
+ dimensions: [:period, :treatment, :subject], deep_map: true,
372
+ suppress_warnings: true)
373
+
374
+ p "No warning messages should be seen from here..."
375
+
376
+ # remove the :patient field from the data, as this field is already given by the
377
+ # :subject field.
378
+ reader.mapping = {:patient => false}
379
+
380
+ # since we are reading with chunk_size = :all, then we will only get one chunk back.
381
+ # Then we can get the first chunk by indexing read with 0: reader.read[0]
382
+ treatment = reader.read
383
+ # p treatment
384
+
385
+ p "... to here. If there are any warning messages then there is something wrong!"
386
+
387
+ end
388
+
389
+ #-------------------------------------------------------------------------------------
390
+ # There is a large difference when parsing multidimensional CSV files with chunks and
391
+ # no chunks. When no chunks are selected, this is identical to normal dimension
392
+ # reading.
393
+ #-------------------------------------------------------------------------------------
394
+
395
+ should "parse multi-dimension csv file to critbit no chunk" do
396
+
397
+ reader = Jcsv.reader("../data/epilepsy.csv", format: :critbit,
398
+ dimensions: [:treatment, :subject, :period], deep_map: true)
399
+
400
+ # remove the :patient field from the data, as this field is already given by the
401
+ # :subject field.
402
+ reader.mapping = {:patient => false}
403
+
404
+ # since we are reading with chunk_size = :all, then we will only get one chunk back.
405
+ # Then we can get the first chunk by indexing read with 0: reader.read[0]
406
+ treatment = reader.read
407
+ # p treatment
408
+
409
+ assert_equal("11", treatment["placebo.1.1"][:base])
410
+ assert_equal("31", treatment["placebo.1.1"][:age])
411
+ assert_equal("5", treatment["placebo.1.1"][:"seizure.rate"])
412
+
413
+ assert_equal("11", treatment["placebo.1.2"][:base])
414
+ assert_equal("31", treatment["placebo.1.2"][:age])
415
+ assert_equal("3", treatment["placebo.1.2"][:"seizure.rate"])
416
+
417
+ end
418
+
419
+ #-------------------------------------------------------------------------------------
420
+ # All examples until now had chunk_size :all, but they can have smaller size. In this
421
+ # example, chunk_size is 20 and it is processed by a block
422
+ #-------------------------------------------------------------------------------------
423
+
424
+ should "read with dimension and given a block in critbit" do
425
+
426
+ reader = Jcsv.reader("../data/epilepsy.csv", format: :critbit, chunk_size: 20,
427
+ dimensions: [:treatment, :subject, :period], deep_map: true,
428
+ default_filter: Jcsv.int)
429
+
430
+ reader.mapping = {:patient => false}
431
+
432
+ reader.read do |line_no, row_no, chunk|
433
+ p line_no
434
+ p row_no
435
+ p chunk
436
+ end
437
+
438
+ end
439
+
440
+ end
441
+
442
+ end