mdarray-jcsv 0.6.3-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +23 -0
  3. data/README.md +2 -0
  4. data/Rakefile +46 -0
  5. data/config.rb +104 -0
  6. data/lib/constraints.rb +205 -0
  7. data/lib/date_filters.rb +252 -0
  8. data/lib/dimensions.rb +276 -0
  9. data/lib/filters.rb +332 -0
  10. data/lib/jcsv.rb +107 -0
  11. data/lib/list_reader.rb +200 -0
  12. data/lib/locale.rb +192 -0
  13. data/lib/map_reader.rb +192 -0
  14. data/lib/mdarray-jcsv.rb +24 -0
  15. data/lib/mdarray_reader.rb +110 -0
  16. data/lib/numeric_filters.rb +225 -0
  17. data/lib/reader.rb +547 -0
  18. data/lib/supercsv_interface.rb +231 -0
  19. data/test/test_complete.rb +37 -0
  20. data/test/test_critbit.rb +442 -0
  21. data/test/test_customer_list.rb +436 -0
  22. data/test/test_customer_map.rb +209 -0
  23. data/test/test_customer_nhlist.rb +161 -0
  24. data/test/test_deep_map.rb +264 -0
  25. data/test/test_del.rb +73 -0
  26. data/test/test_dimensions.rb +231 -0
  27. data/test/test_example.rb +79 -0
  28. data/test/test_filters.rb +374 -0
  29. data/test/test_list_dimensions.rb +110 -0
  30. data/test/test_mdarray.rb +227 -0
  31. data/test/test_missing_data.rb +57 -0
  32. data/vendor/commons-beanutils-1.8.3.jar +0 -0
  33. data/vendor/commons-lang3-3.1.jar +0 -0
  34. data/vendor/dozer-5.4.0.jar +0 -0
  35. data/vendor/jcl-over-slf4j-1.6.6.jar +0 -0
  36. data/vendor/joda-time-2.7.jar +0 -0
  37. data/vendor/slf4j-api-1.7.5.jar +0 -0
  38. data/vendor/snakeyaml-1.14.jar +0 -0
  39. data/vendor/super-csv-2.4.0.jar +0 -0
  40. data/vendor/super-csv-dozer-2.4.0.jar +0 -0
  41. data/vendor/super-csv-java8-2.4.0.jar +0 -0
  42. data/vendor/super-csv-joda-2.4.0.jar +0 -0
  43. data/version.rb +2 -0
  44. metadata +196 -0
@@ -0,0 +1,231 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ ##########################################################################################
4
+ # @author Rodrigo Botafogo
5
+ #
6
+ # Copyright © 2015 Rodrigo Botafogo. All Rights Reserved. Permission to use, copy, modify,
7
+ # and distribute this software and its documentation, without fee and without a signed
8
+ # licensing agreement, is hereby granted, provided that the above copyright notice, this
9
+ # paragraph and the following two paragraphs appear in all copies, modifications, and
10
+ # distributions.
11
+ #
12
+ # IN NO EVENT SHALL RODRIGO BOTAFOGO BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
13
+ # INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
14
+ # THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF RODRIGO BOTAFOGO HAS BEEN ADVISED OF THE
15
+ # POSSIBILITY OF SUCH DAMAGE.
16
+ #
17
+ # RODRIGO BOTAFOGO SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
18
+ # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
19
+ # SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS".
20
+ # RODRIGO BOTAFOGO HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS,
21
+ # OR MODIFICATIONS.
22
+ ##########################################################################################
23
+
24
+ require_relative 'dimensions'
25
+
26
+ class Jcsv
27
+ include_package "org.supercsv.cellprocessor.ift"
28
+
29
+ #========================================================================================
30
+ # Mapping contains a mapping from column names to:
31
+ # * other column names: when we want to change the name of the column
32
+ # * false: when we want to remove the column from reading
33
+ # * true: when the column is a dimensions
34
+ # If there is no mapping then the column number maps to itself
35
+ #========================================================================================
36
+
37
+ class Mapping
38
+
39
+ attr_accessor :mapping
40
+
41
+ def initialize
42
+ @mapping = nil
43
+ end
44
+
45
+ def [](index)
46
+ # p "#{@mapping}, #{index}"
47
+ (@mapping.nil?)? index : @mapping[index]
48
+ end
49
+
50
+ def []=(index, value)
51
+ @mapping[index] = value
52
+ end
53
+
54
+ end
55
+
56
+ #========================================================================================
57
+ # Module Processors interfaces the Ruby code with the SuperCsv cell processors.
58
+ #========================================================================================
59
+
60
+ module Processors
61
+ include_package "org.supercsv.util"
62
+ include_package "org.supercsv.exception"
63
+
64
+ attr_reader :dimensions
65
+ attr_reader :key_array
66
+
67
+ #---------------------------------------------------------------------------------------
68
+ #
69
+ #---------------------------------------------------------------------------------------
70
+
71
+ def headers
72
+ @headers ||= getHeader(true).to_a
73
+ end
74
+
75
+ #---------------------------------------------------------------------------------------
76
+ # This method uses variable @processed_columns that should be initialized in the class
77
+ # that includes this module. In the case of a list_reader for instance, processed_columns
78
+ # is initalized as an Array. For map_reader, processed_columns is initalized as a
79
+ # Hash. So, processed_columns is a data structure for storing the data processed. The
80
+ # mapping defines where the data should be stored in this data structure. In the case
81
+ # of list_reader, mapping[i] = i, for map_reader, mapping[i] = <mapping name for hash>
82
+ #---------------------------------------------------------------------------------------
83
+
84
+ def executeProcessors(processors)
85
+
86
+ source = getColumns()
87
+
88
+ context = CsvContext.new(getLineNumber(), getRowNumber(), 1);
89
+ context.setRowSource(source);
90
+
91
+ # raise "The number of columns to be processed #{source.size} must match the number of
92
+ # CellProcessors #{processors.length}" if (source.size != processors.length)
93
+
94
+ @key_array = Array.new
95
+
96
+ source.each_with_index do |s, i|
97
+ begin
98
+ # is @column_mapping[i] ever nil? I don't think so... CHECK!!!
99
+ next if ((@column_mapping[i] == false) || (@column_mapping[i].nil?))
100
+ # if column mapping is 'true', then this column is a dimension and the data in this
101
+ # column is part of the key
102
+ if (@column_mapping[i] == true)
103
+ begin
104
+ @dimensions[@headers[i]] = s
105
+ rescue RuntimeError => e
106
+ puts "Warning reading row: #{source.toString()} in field '#{@headers[i]}'. " +
107
+ e.message if !@suppress_warnings
108
+ # raise "Error reading row: #{source.toString()} in field '#{@headers[i]}'. " +
109
+ # e.message
110
+ end
111
+ @key_array[@dimensions.dimensions_names.index(@headers[i])] = s
112
+ next
113
+ end
114
+
115
+ context.setColumnNumber(i + 1)
116
+ if (i >= processors.size)
117
+ @processed_columns[@column_mapping[i]] = s
118
+ else
119
+ if (processors[i] == nil)
120
+ @processed_columns[@column_mapping[i]] = s
121
+ else
122
+ cell = processors[i].execute(s, context)
123
+ # cell = (cell.is_a? Jcsv::Pack)? cell.ruby_obj : cell
124
+ @processed_columns[@column_mapping[i]] = cell
125
+ end
126
+ end
127
+ rescue SuperCsvConstraintViolationException => e
128
+ raise Jcsv::ContraintViolation.new("Constraint violation: #{context.toString}")
129
+ end
130
+
131
+ end
132
+
133
+ @processed_columns
134
+
135
+ end
136
+
137
+ end
138
+
139
+ #========================================================================================
140
+ # Class CLR (CSV List Reader) wraps java CsvListReader.
141
+ #========================================================================================
142
+
143
+ class CLR < org.supercsv.io.CsvListReader
144
+ include_package "org.supercsv.cellprocessor.ift"
145
+ include Processors
146
+
147
+ #---------------------------------------------------------------------------------------
148
+ #
149
+ #---------------------------------------------------------------------------------------
150
+
151
+ def initialize(filereader, preferences, dimensions = nil, suppress_warnings)
152
+ @dimensions = dimensions
153
+ @suppress_warnings = suppress_warnings
154
+ super(filereader, preferences)
155
+ end
156
+
157
+ #---------------------------------------------------------------------------------------
158
+ #
159
+ #---------------------------------------------------------------------------------------
160
+
161
+ def read(column_mapping, filters)
162
+
163
+ # initialize @processed_columns to a new Array. This will be used by method
164
+ # executeProcessor from module Processors. @column_mapping also needs to be initialized
165
+ # to the column_mapping received. Used by methods in module Processors
166
+ @processed_columns = Array.new
167
+ @column_mapping = column_mapping
168
+
169
+ data_read = (filters == false)? super([].to_java(CellProcessor)) :
170
+ super(filters.values.to_java(CellProcessor))
171
+ data_read.unshift(@key_array) if dimensions && data_read
172
+ data_read
173
+ end
174
+
175
+ end
176
+
177
+ #========================================================================================
178
+ # class CMR (CSV Map Reader) wraps class CsvMapReader
179
+ #========================================================================================
180
+
181
+ class CMR < org.supercsv.io.CsvMapReader
182
+ include_package "org.supercsv.cellprocessor.ift"
183
+ include Processors
184
+
185
+ # When dimensions are defined, then the composition of all dimensions is the 'key'
186
+ # attr_reader :key
187
+
188
+ #---------------------------------------------------------------------------------------
189
+ #
190
+ #---------------------------------------------------------------------------------------
191
+
192
+ def initialize(filereader, preferences, dimensions = nil, suppress_warnings)
193
+ @dimensions = dimensions
194
+ @suppress_warnings = suppress_warnings
195
+ super(filereader, preferences)
196
+ end
197
+
198
+ #---------------------------------------------------------------------------------------
199
+ #
200
+ #---------------------------------------------------------------------------------------
201
+
202
+ def read(column_mapping, filters)
203
+
204
+ # initialize @processed_columns to a new Hash. This will be used by method
205
+ # executeProcessor from module Processors
206
+ @processed_columns = Hash.new
207
+ @column_mapping = column_mapping
208
+
209
+ (filters == false)? super(*column_mapping.mapping) :
210
+ filter_input(column_mapping, filters.values.to_java(CellProcessor))
211
+
212
+ end
213
+
214
+ #---------------------------------------------------------------------------------------
215
+ #
216
+ #---------------------------------------------------------------------------------------
217
+
218
+ def filter_input(name_mapping, processors)
219
+
220
+ if (readRow())
221
+ processed_columns = executeProcessors(processors)
222
+ processed_columns[:key] = @key_array if dimensions
223
+ return processed_columns
224
+ end
225
+
226
+ end
227
+
228
+ end
229
+
230
+ end
231
+
@@ -0,0 +1,37 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ ##########################################################################################
4
+ # Copyright © 2013 Rodrigo Botafogo. All Rights Reserved. Permission to use, copy, modify,
5
+ # and distribute this software and its documentation, without fee and without a signed
6
+ # licensing agreement, is hereby granted, provided that the above copyright notice, this
7
+ # paragraph and the following two paragraphs appear in all copies, modifications, and
8
+ # distributions.
9
+ #
10
+ # IN NO EVENT SHALL RODRIGO BOTAFOGO BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
11
+ # INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
12
+ # THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF RODRIGO BOTAFOGO HAS BEEN ADVISED OF THE
13
+ # POSSIBILITY OF SUCH DAMAGE.
14
+ #
15
+ # RODRIGO BOTAFOGO SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
16
+ # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
17
+ # SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS".
18
+ # RODRIGO BOTAFOGO HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS,
19
+ # OR MODIFICATIONS.
20
+ ##########################################################################################
21
+
22
+ require 'rubygems'
23
+ require "test/unit"
24
+ require 'shoulda'
25
+
26
+ require_relative '../config'
27
+ require 'jcsv'
28
+
29
+ require_relative 'test_customer_list'
30
+ require_relative 'test_customer_map'
31
+ require_relative 'test_dimensions'
32
+ require_relative 'test_deep_map'
33
+ require_relative 'test_critbit'
34
+ require_relative 'test_filters'
35
+ require_relative 'test_list_dimensions'
36
+ require_relative 'test_customer_nhlist'
37
+ require_relative 'test_mdarray'
@@ -0,0 +1,442 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ ##########################################################################################
4
+ # Copyright © 2015 Rodrigo Botafogo. All Rights Reserved. Permission to use, copy, modify,
5
+ # and distribute this software and its documentation for educational, research, and
6
+ # not-for-profit purposes, without fee and without a signed licensing agreement, is hereby
7
+ # granted, provided that the above copyright notice, this paragraph and the following two
8
+ # paragraphs appear in all copies, modifications, and distributions. Contact Rodrigo
9
+ # Botafogo - rodrigo.a.botafogo@gmail.com for commercial licensing opportunities.
10
+ #
11
+ # IN NO EVENT SHALL RODRIGO BOTAFOGO BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
12
+ # INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
13
+ # THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF RODRIGO BOTAFOGO HAS BEEN ADVISED OF THE
14
+ # POSSIBILITY OF SUCH DAMAGE.
15
+ #
16
+ # RODRIGO BOTAFOGO SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
17
+ # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
18
+ # SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS".
19
+ # RODRIGO BOTAFOGO HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS,
20
+ # OR MODIFICATIONS.
21
+ ##########################################################################################
22
+
23
+ require 'rubygems'
24
+ require 'test/unit'
25
+ require 'shoulda'
26
+ require 'matrix'
27
+
28
+ require 'pp'
29
+ require_relative '../config'
30
+
31
+ require 'jcsv'
32
+
33
+ class CSVTest < Test::Unit::TestCase
34
+
35
+ context "CSV test" do
36
+
37
+ setup do
38
+
39
+ end
40
+
41
+ #-------------------------------------------------------------------------------------
42
+ # When reading the CSV file in one big chunk and selecting deep_map: true, then each
43
+ # dimension will be hashed across all rows. [This is not clear at all!!! IMPROVE.]
44
+ #-------------------------------------------------------------------------------------
45
+
46
+ should "parse multi-dimension csv into a critbit, alphabetical order" do
47
+
48
+ reader = Jcsv.reader("../data/customer.csv", format: :critbit,
49
+ dimensions: [:last_name, :first_name])
50
+
51
+ customers = reader.read
52
+ assert_equal("Down.Bob", customers.keys[0])
53
+ assert_equal("Dunbar.John", customers.keys[1])
54
+
55
+ reader = Jcsv.reader("../data/customer.csv", format: :critbit,
56
+ dimensions: [:first_name, :last_name])
57
+
58
+ customers = reader.read
59
+ assert_equal("Alice.Wunderland", customers.keys[0])
60
+ assert_equal("Bill.Jobs", customers.keys[1])
61
+
62
+ end
63
+
64
+ #-------------------------------------------------------------------------------------
65
+ # Read data into a flat map. Allows random access to the data by use of the map
66
+ # 'key'. The 'key' is a string that concatenates the values of the dimensions's
67
+ # labels with a '.'.
68
+ #-------------------------------------------------------------------------------------
69
+
70
+ should "read data into flat critbit" do
71
+
72
+ reader = Jcsv.reader("../data/epilepsy.csv", format: :critbit,
73
+ dimensions: [:treatment, :subject, :period],
74
+ default_filter: Jcsv.int)
75
+
76
+ # remove the :patient field from the data, as this field is already given by the
77
+ # :subject field.
78
+ reader.mapping = {:patient => false}
79
+
80
+ # read all the data into a flat map (hash) with keys the dimensions values
81
+ # concatenated with '.'.
82
+ treatment = reader.read
83
+ # p treatment
84
+
85
+ assert_equal(11, treatment["placebo.1.1"][:base])
86
+ assert_equal(31, treatment["placebo.1.1"][:age])
87
+ assert_equal(5, treatment["placebo.1.1"][:"seizure.rate"])
88
+
89
+ assert_equal(31, treatment["Progabide.35.2"][:base])
90
+ assert_equal(30, treatment["Progabide.35.2"][:age])
91
+ assert_equal(17, treatment["Progabide.35.2"][:"seizure.rate"])
92
+
93
+ end
94
+
95
+ #-------------------------------------------------------------------------------------
96
+ # Read data into a flat map in chunks
97
+ #-------------------------------------------------------------------------------------
98
+
99
+ should "read data into flat critbit in chunks" do
100
+
101
+ # paramenter deep_map: is not passed. By default it is false
102
+ reader = Jcsv.reader("../data/epilepsy.csv", format: :critbit, chunk_size: 20,
103
+ dimensions: [:treatment, :subject, :period],
104
+ default_filter: Jcsv.int)
105
+
106
+ # remove the :patient field from the data, as this field is already given by the
107
+ # :subject field.
108
+ reader.mapping = {:patient => false}
109
+ treatment = reader.read
110
+ # p treatment
111
+
112
+ treatment_type = reader.dimensions[:treatment]
113
+ subject = reader.dimensions[:subject]
114
+ period = reader.dimensions[:period]
115
+
116
+ # variable labels has all dimension labels
117
+ assert_equal(0, treatment_type.labels["placebo"])
118
+ assert_equal(1, treatment_type.labels["Progabide"])
119
+ assert_equal(1, subject.labels["2"])
120
+ assert_equal(13, subject.labels["14"])
121
+ assert_equal(58, subject.labels["59"])
122
+ assert_equal(0, period.labels["1"])
123
+ assert_equal(3, period.labels["4"])
124
+
125
+ # we now need to access the first chunk [0] to get to the desired element
126
+ assert_equal(11, treatment[0]["placebo.1.1"][:base])
127
+ assert_equal(31, treatment[0]["placebo.1.1"][:age])
128
+ assert_equal(5, treatment[0]["placebo.1.1"][:"seizure.rate"])
129
+
130
+ # chunk [0] does not have key "Progabide.35.2"
131
+ assert_equal(nil, treatment[0]["Progabide.35.2"])
132
+
133
+ assert_equal(10, treatment[6]["Progabide.32.3"][:base])
134
+ assert_equal(30, treatment[6]["Progabide.32.3"][:age])
135
+ assert_equal(1, treatment[6]["Progabide.32.3"][:"seizure.rate"])
136
+
137
+ end
138
+
139
+ #-------------------------------------------------------------------------------------
140
+ #
141
+ #-------------------------------------------------------------------------------------
142
+
143
+ should "read to critbit in enumerable chunks" do
144
+
145
+ # paramenter deep_map: is not passed. By default it is false
146
+ reader = Jcsv.reader("../data/epilepsy.csv", format: :critbit, chunk_size: 20,
147
+ dimensions: [:treatment, :subject, :period],
148
+ default_filter: Jcsv.int)
149
+
150
+ # Method each without a block returns an enumerator
151
+ enum = reader.each
152
+
153
+ # read the first chunk. Chunk is of size 20
154
+ chunk = enum.next
155
+ data = chunk[2]
156
+
157
+ # in this case, only the first 20 rows were read, so only one treatment and six
158
+ # subjects were read until this point
159
+ assert_equal(1, reader.dimensions[:treatment].size)
160
+ # assert_equal(6, reader.dimensions[:subject].size)
161
+
162
+ assert_equal(8, data["placebo.4.4"][:base])
163
+ assert_equal(36, data["placebo.4.4"][:age])
164
+ assert_equal(4, data["placebo.4.4"][:"seizure.rate"])
165
+
166
+ # read the next chunk. Chunk is of size 20
167
+ chunk = enum.next
168
+
169
+ # read the next chunk... not interested in the second chunk for some reason...
170
+ chunk = enum.next
171
+ data = chunk[2]
172
+
173
+ # As we read new chunks of data, the dimensions labels accumulate, i.e., they are
174
+ # not erased between reads of every chunk (call to the next function). Dimensions
175
+ # are variables from the reader and not the chunk.
176
+ assert_equal(1, reader.dimensions[:treatment].size)
177
+ assert_equal(16, reader.dimensions[:subject].size)
178
+
179
+ assert_equal(33, data["placebo.12.2"][:base])
180
+ assert_equal(24, data["placebo.12.2"][:age])
181
+ assert_equal(6, data["placebo.12.2"][:"seizure.rate"])
182
+
183
+ end
184
+
185
+ #-------------------------------------------------------------------------------------
186
+ #
187
+ #-------------------------------------------------------------------------------------
188
+
189
+ should "read to critbit and pass to block with dimensions" do
190
+
191
+ reader = Jcsv.reader("../data/epilepsy.csv", format: :critbit,
192
+ dimensions: [:treatment, :subject, :period],
193
+ default_filter: Jcsv.int)
194
+
195
+ reader.read do |line_no, row_no, row|
196
+ assert_equal(1, row.keys.size)
197
+ end
198
+
199
+ end
200
+
201
+ #-------------------------------------------------------------------------------------
202
+ #
203
+ #-------------------------------------------------------------------------------------
204
+
205
+ should "read to critbit and pass to block with dimensions, chunk_size > 1" do
206
+
207
+ reader = Jcsv.reader("../data/epilepsy.csv", format: :critbit, chunk_size: 20,
208
+ dimensions: [:treatment, :subject, :period],
209
+ default_filter: Jcsv.int)
210
+
211
+ reader.read do |line_no, row_no, row|
212
+ assert_equal(20, row.keys.size) if line_no < 230
213
+ end
214
+
215
+ end
216
+
217
+ #-------------------------------------------------------------------------------------
218
+ #
219
+ #-------------------------------------------------------------------------------------
220
+
221
+ should "raise error if mapping a column to true in critbit" do
222
+
223
+ reader = Jcsv.reader("../data/epilepsy.csv", format: :critbit, chunk_size: :all,
224
+ dimensions: [:subject, :period],
225
+ default_filter: Jcsv.int)
226
+
227
+ # Raises an error, since mapping to true is not defined
228
+ assert_raise ( ArgumentError ) { reader.mapping =
229
+ {:treatment => false, :patient => true} }
230
+
231
+ end
232
+
233
+ #-------------------------------------------------------------------------------------
234
+ # When reading the CSV file in one big chunk and selecting deep_map: true, then each
235
+ # dimension will be hashed across all rows.
236
+ #-------------------------------------------------------------------------------------
237
+
238
+ should "parse multi-dimension csv file to critbit, chuk_size all and deep_map true" do
239
+
240
+ reader = Jcsv.reader("../data/epilepsy.csv", format: :critbit, chunk_size: :all,
241
+ dimensions: [:treatment, :subject, :period], deep_map: true)
242
+
243
+ # remove the :patient field from the data, as this field is already given by the
244
+ # :subject field.
245
+ reader.mapping = {:patient => false}
246
+
247
+ # since we are reading with chunk_size = :all, then we will only get one chunk back.
248
+ # Then we can get the first chunk by indexing read with 0: reader.read[0]
249
+ treatment = reader.read[0]
250
+ # p treatment
251
+
252
+ # get the dimensions
253
+ treatment_type = reader.dimensions[:treatment]
254
+ subject = reader.dimensions[:subject]
255
+ period = reader.dimensions[:period]
256
+
257
+ # variable labels has all dimension labels
258
+ assert_equal(0, treatment_type.labels["placebo"])
259
+ assert_equal(1, treatment_type.labels["Progabide"])
260
+ assert_equal(1, subject.labels["2"])
261
+ assert_equal(13, subject.labels["14"])
262
+ assert_equal(58, subject.labels["59"])
263
+ assert_equal(0, period.labels["1"])
264
+ assert_equal(3, period.labels["4"])
265
+
266
+ assert_equal("14", treatment["placebo"]["10"]["1"][:"seizure.rate"])
267
+
268
+ end
269
+
270
+ #-------------------------------------------------------------------------------------
271
+ #
272
+ #-------------------------------------------------------------------------------------
273
+
274
+ should "read data with dimensions, mapping and filters into a critbit" do
275
+
276
+ reader = Jcsv.reader("../data/epilepsy.csv", format: :critbit, chunk_size: :all,
277
+ dimensions: [:treatment, :subject, :period], deep_map: true,
278
+ default_filter: Jcsv.int)
279
+
280
+ # remove the :patient field from the data, as this field is already given by the
281
+ # :subject field.
282
+ reader.mapping = {:patient => false}
283
+ reader.filters = {:"seizure.rate" => Jcsv.float}
284
+
285
+ # will raise an exception as :period is not a key. Will break as soon as we read the
286
+ # first period for the second user
287
+ treatment = reader.read[0]
288
+ # p treatment
289
+
290
+ assert_equal(14.0, treatment["placebo"]["10"]["1"][:"seizure.rate"])
291
+ assert_equal(19.0, treatment["Progabide"]["45"]["1"][:"seizure.rate"])
292
+
293
+ end
294
+
295
+ #-------------------------------------------------------------------------------------
296
+ #
297
+ #-------------------------------------------------------------------------------------
298
+
299
+ should "read data with deep_map in critbit but chunk_size not all" do
300
+
301
+ reader = Jcsv.reader("../data/epilepsy.csv", format: :critbit, chunk_size: 20,
302
+ dimensions: [:treatment, :subject, :period], deep_map: true,
303
+ default_filter: Jcsv.int)
304
+
305
+ # remove the :patient field from the data, as this field is already given by the
306
+ # :subject field.
307
+ reader.mapping = {:patient => false}
308
+ reader.filters = {:"seizure.rate" => Jcsv.float}
309
+
310
+ # will raise an exception as :period is not a key. Will break as soon as we read the
311
+ # first period for the second user
312
+ treatment = reader.read
313
+
314
+ assert_equal(3.0, treatment[0]["placebo"]["2"]["1"][:"seizure.rate"])
315
+ # since only 20 rows read per chunk, there is no Progabide row yet. Note that there
316
+ # was data in the test above
317
+ assert_equal(nil, treatment[0]["Progabide"])
318
+
319
+ # chunk 10, has Progabide as a dimension
320
+ assert_equal(6.0, treatment[10]["Progabide"]["51"]["2"][:"seizure.rate"])
321
+
322
+ end
323
+
324
+ #-------------------------------------------------------------------------------------
325
+ #
326
+ #-------------------------------------------------------------------------------------
327
+
328
+ should "raise exception if key is repeated in critbit" do
329
+
330
+ reader = Jcsv.reader("../data/epilepsy.csv", format: :critbit, chunk_size: :all,
331
+ dimensions: [:period], deep_map: true)
332
+
333
+ # will raise an exception as :period is not a key. Will break as soon as we read the
334
+ # first period for the second user
335
+ assert_raise ( Jcsv::DuplicateKeyError ) { reader.read[0] }
336
+
337
+ end
338
+
339
+ #-------------------------------------------------------------------------------------
340
+ # When reading the CSV file in one big chunk and selecting deep_map: true, then each
341
+ # dimension will be hashed across all rows. [This is not clear at all!!! IMPROVE.]
342
+ #-------------------------------------------------------------------------------------
343
+
344
+ should "Show errors when dimensions are not in order or missing in critbit" do
345
+
346
+ reader = Jcsv.reader("../data/epilepsy.csv", format: :critbit, chunk_size: :all,
347
+ dimensions: [:period, :treatment, :subject], deep_map: true)
348
+
349
+ p "LOTS OF ERROR MESSAGES EXPECTED FROM HERE..."
350
+
351
+ # remove the :patient field from the data, as this field is already given by the
352
+ # :subject field.
353
+ reader.mapping = {:patient => false}
354
+
355
+ # since we are reading with chunk_size = :all, then we will only get one chunk back.
356
+ # Then we can get the first chunk by indexing read with 0: reader.read[0]
357
+ treatment = reader.read[0]
358
+
359
+ p "... TO HERE. If no error messages, then something is wrong!"
360
+
361
+ end
362
+
363
+ #-------------------------------------------------------------------------------------
364
+ # When reading the CSV file in one big chunk and selecting deep_map: true, then each
365
+ # dimension will be hashed across all rows. [This is not clear at all!!! IMPROVE.]
366
+ #-------------------------------------------------------------------------------------
367
+
368
+ should "Suppress warnings when dimensions are not in order or missing in critbit" do
369
+
370
+ reader = Jcsv.reader("../data/epilepsy.csv", format: :critbit, chunk_size: :all,
371
+ dimensions: [:period, :treatment, :subject], deep_map: true,
372
+ suppress_warnings: true)
373
+
374
+ p "No warning messages should be seen from here..."
375
+
376
+ # remove the :patient field from the data, as this field is already given by the
377
+ # :subject field.
378
+ reader.mapping = {:patient => false}
379
+
380
+ # since we are reading with chunk_size = :all, then we will only get one chunk back.
381
+ # Then we can get the first chunk by indexing read with 0: reader.read[0]
382
+ treatment = reader.read
383
+ # p treatment
384
+
385
+ p "... to here. If there are any warning messages then there is something wrong!"
386
+
387
+ end
388
+
389
+ #-------------------------------------------------------------------------------------
390
+ # There is a large difference when parsing multidimensional CSV files with chunks and
391
+ # no chunks. When no chunks are selected, this is identical to normal dimension
392
+ # reading.
393
+ #-------------------------------------------------------------------------------------
394
+
395
+ should "parse multi-dimension csv file to critbit no chunk" do
396
+
397
+ reader = Jcsv.reader("../data/epilepsy.csv", format: :critbit,
398
+ dimensions: [:treatment, :subject, :period], deep_map: true)
399
+
400
+ # remove the :patient field from the data, as this field is already given by the
401
+ # :subject field.
402
+ reader.mapping = {:patient => false}
403
+
404
+ # since we are reading with chunk_size = :all, then we will only get one chunk back.
405
+ # Then we can get the first chunk by indexing read with 0: reader.read[0]
406
+ treatment = reader.read
407
+ # p treatment
408
+
409
+ assert_equal("11", treatment["placebo.1.1"][:base])
410
+ assert_equal("31", treatment["placebo.1.1"][:age])
411
+ assert_equal("5", treatment["placebo.1.1"][:"seizure.rate"])
412
+
413
+ assert_equal("11", treatment["placebo.1.2"][:base])
414
+ assert_equal("31", treatment["placebo.1.2"][:age])
415
+ assert_equal("3", treatment["placebo.1.2"][:"seizure.rate"])
416
+
417
+ end
418
+
419
+ #-------------------------------------------------------------------------------------
420
+ # All examples until now had chunk_size :all, but they can have smaller size. In this
421
+ # example, chunk_size is 20 and it is processed by a block
422
+ #-------------------------------------------------------------------------------------
423
+
424
+ should "read with dimension and given a block in critbit" do
425
+
426
+ reader = Jcsv.reader("../data/epilepsy.csv", format: :critbit, chunk_size: 20,
427
+ dimensions: [:treatment, :subject, :period], deep_map: true,
428
+ default_filter: Jcsv.int)
429
+
430
+ reader.mapping = {:patient => false}
431
+
432
+ reader.read do |line_no, row_no, chunk|
433
+ p line_no
434
+ p row_no
435
+ p chunk
436
+ end
437
+
438
+ end
439
+
440
+ end
441
+
442
+ end