mdarray-jcsv 0.6.3-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +23 -0
- data/README.md +2 -0
- data/Rakefile +46 -0
- data/config.rb +104 -0
- data/lib/constraints.rb +205 -0
- data/lib/date_filters.rb +252 -0
- data/lib/dimensions.rb +276 -0
- data/lib/filters.rb +332 -0
- data/lib/jcsv.rb +107 -0
- data/lib/list_reader.rb +200 -0
- data/lib/locale.rb +192 -0
- data/lib/map_reader.rb +192 -0
- data/lib/mdarray-jcsv.rb +24 -0
- data/lib/mdarray_reader.rb +110 -0
- data/lib/numeric_filters.rb +225 -0
- data/lib/reader.rb +547 -0
- data/lib/supercsv_interface.rb +231 -0
- data/test/test_complete.rb +37 -0
- data/test/test_critbit.rb +442 -0
- data/test/test_customer_list.rb +436 -0
- data/test/test_customer_map.rb +209 -0
- data/test/test_customer_nhlist.rb +161 -0
- data/test/test_deep_map.rb +264 -0
- data/test/test_del.rb +73 -0
- data/test/test_dimensions.rb +231 -0
- data/test/test_example.rb +79 -0
- data/test/test_filters.rb +374 -0
- data/test/test_list_dimensions.rb +110 -0
- data/test/test_mdarray.rb +227 -0
- data/test/test_missing_data.rb +57 -0
- data/vendor/commons-beanutils-1.8.3.jar +0 -0
- data/vendor/commons-lang3-3.1.jar +0 -0
- data/vendor/dozer-5.4.0.jar +0 -0
- data/vendor/jcl-over-slf4j-1.6.6.jar +0 -0
- data/vendor/joda-time-2.7.jar +0 -0
- data/vendor/slf4j-api-1.7.5.jar +0 -0
- data/vendor/snakeyaml-1.14.jar +0 -0
- data/vendor/super-csv-2.4.0.jar +0 -0
- data/vendor/super-csv-dozer-2.4.0.jar +0 -0
- data/vendor/super-csv-java8-2.4.0.jar +0 -0
- data/vendor/super-csv-joda-2.4.0.jar +0 -0
- data/version.rb +2 -0
- metadata +196 -0
@@ -0,0 +1,231 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
3
|
+
##########################################################################################
|
4
|
+
# @author Rodrigo Botafogo
|
5
|
+
#
|
6
|
+
# Copyright © 2015 Rodrigo Botafogo. All Rights Reserved. Permission to use, copy, modify,
|
7
|
+
# and distribute this software and its documentation, without fee and without a signed
|
8
|
+
# licensing agreement, is hereby granted, provided that the above copyright notice, this
|
9
|
+
# paragraph and the following two paragraphs appear in all copies, modifications, and
|
10
|
+
# distributions.
|
11
|
+
#
|
12
|
+
# IN NO EVENT SHALL RODRIGO BOTAFOGO BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
|
13
|
+
# INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
|
14
|
+
# THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF RODRIGO BOTAFOGO HAS BEEN ADVISED OF THE
|
15
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
16
|
+
#
|
17
|
+
# RODRIGO BOTAFOGO SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
18
|
+
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
|
19
|
+
# SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS".
|
20
|
+
# RODRIGO BOTAFOGO HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS,
|
21
|
+
# OR MODIFICATIONS.
|
22
|
+
##########################################################################################
|
23
|
+
|
24
|
+
require_relative 'dimensions'
|
25
|
+
|
26
|
+
class Jcsv
|
27
|
+
include_package "org.supercsv.cellprocessor.ift"
|
28
|
+
|
29
|
+
#========================================================================================
|
30
|
+
# Mapping contains a mapping from column names to:
|
31
|
+
# * other column names: when we want to change the name of the column
|
32
|
+
# * false: when we want to remove the column from reading
|
33
|
+
# * true: when the column is a dimensions
|
34
|
+
# If there is no mapping then the column number maps to itself
|
35
|
+
#========================================================================================
|
36
|
+
|
37
|
+
class Mapping
|
38
|
+
|
39
|
+
attr_accessor :mapping
|
40
|
+
|
41
|
+
def initialize
|
42
|
+
@mapping = nil
|
43
|
+
end
|
44
|
+
|
45
|
+
def [](index)
|
46
|
+
# p "#{@mapping}, #{index}"
|
47
|
+
(@mapping.nil?)? index : @mapping[index]
|
48
|
+
end
|
49
|
+
|
50
|
+
def []=(index, value)
|
51
|
+
@mapping[index] = value
|
52
|
+
end
|
53
|
+
|
54
|
+
end
|
55
|
+
|
56
|
+
#========================================================================================
|
57
|
+
# Module Processors interfaces the Ruby code with the SuperCsv cell processors.
|
58
|
+
#========================================================================================
|
59
|
+
|
60
|
+
module Processors
|
61
|
+
include_package "org.supercsv.util"
|
62
|
+
include_package "org.supercsv.exception"
|
63
|
+
|
64
|
+
attr_reader :dimensions
|
65
|
+
attr_reader :key_array
|
66
|
+
|
67
|
+
#---------------------------------------------------------------------------------------
|
68
|
+
#
|
69
|
+
#---------------------------------------------------------------------------------------
|
70
|
+
|
71
|
+
def headers
|
72
|
+
@headers ||= getHeader(true).to_a
|
73
|
+
end
|
74
|
+
|
75
|
+
#---------------------------------------------------------------------------------------
|
76
|
+
# This method uses variable @processed_columns that should be initialized in the class
|
77
|
+
# that includes this module. In the case of a list_reader for instance, processed_columns
|
78
|
+
# is initalized as an Array. For map_reader, processed_columns is initalized as a
|
79
|
+
# Hash. So, processed_columns is a data structure for storing the data processed. The
|
80
|
+
# mapping defines where the data should be stored in this data structure. In the case
|
81
|
+
# of list_reader, mapping[i] = i, for map_reader, mapping[i] = <mapping name for hash>
|
82
|
+
#---------------------------------------------------------------------------------------
|
83
|
+
|
84
|
+
def executeProcessors(processors)
|
85
|
+
|
86
|
+
source = getColumns()
|
87
|
+
|
88
|
+
context = CsvContext.new(getLineNumber(), getRowNumber(), 1);
|
89
|
+
context.setRowSource(source);
|
90
|
+
|
91
|
+
# raise "The number of columns to be processed #{source.size} must match the number of
|
92
|
+
# CellProcessors #{processors.length}" if (source.size != processors.length)
|
93
|
+
|
94
|
+
@key_array = Array.new
|
95
|
+
|
96
|
+
source.each_with_index do |s, i|
|
97
|
+
begin
|
98
|
+
# is @column_mapping[i] ever nil? I don't think so... CHECK!!!
|
99
|
+
next if ((@column_mapping[i] == false) || (@column_mapping[i].nil?))
|
100
|
+
# if column mapping is 'true', then this column is a dimension and the data in this
|
101
|
+
# column is part of the key
|
102
|
+
if (@column_mapping[i] == true)
|
103
|
+
begin
|
104
|
+
@dimensions[@headers[i]] = s
|
105
|
+
rescue RuntimeError => e
|
106
|
+
puts "Warning reading row: #{source.toString()} in field '#{@headers[i]}'. " +
|
107
|
+
e.message if !@suppress_warnings
|
108
|
+
# raise "Error reading row: #{source.toString()} in field '#{@headers[i]}'. " +
|
109
|
+
# e.message
|
110
|
+
end
|
111
|
+
@key_array[@dimensions.dimensions_names.index(@headers[i])] = s
|
112
|
+
next
|
113
|
+
end
|
114
|
+
|
115
|
+
context.setColumnNumber(i + 1)
|
116
|
+
if (i >= processors.size)
|
117
|
+
@processed_columns[@column_mapping[i]] = s
|
118
|
+
else
|
119
|
+
if (processors[i] == nil)
|
120
|
+
@processed_columns[@column_mapping[i]] = s
|
121
|
+
else
|
122
|
+
cell = processors[i].execute(s, context)
|
123
|
+
# cell = (cell.is_a? Jcsv::Pack)? cell.ruby_obj : cell
|
124
|
+
@processed_columns[@column_mapping[i]] = cell
|
125
|
+
end
|
126
|
+
end
|
127
|
+
rescue SuperCsvConstraintViolationException => e
|
128
|
+
raise Jcsv::ContraintViolation.new("Constraint violation: #{context.toString}")
|
129
|
+
end
|
130
|
+
|
131
|
+
end
|
132
|
+
|
133
|
+
@processed_columns
|
134
|
+
|
135
|
+
end
|
136
|
+
|
137
|
+
end
|
138
|
+
|
139
|
+
#========================================================================================
|
140
|
+
# Class CLR (CSV List Reader) wraps java CsvListReader.
|
141
|
+
#========================================================================================
|
142
|
+
|
143
|
+
class CLR < org.supercsv.io.CsvListReader
|
144
|
+
include_package "org.supercsv.cellprocessor.ift"
|
145
|
+
include Processors
|
146
|
+
|
147
|
+
#---------------------------------------------------------------------------------------
|
148
|
+
#
|
149
|
+
#---------------------------------------------------------------------------------------
|
150
|
+
|
151
|
+
def initialize(filereader, preferences, dimensions = nil, suppress_warnings)
|
152
|
+
@dimensions = dimensions
|
153
|
+
@suppress_warnings = suppress_warnings
|
154
|
+
super(filereader, preferences)
|
155
|
+
end
|
156
|
+
|
157
|
+
#---------------------------------------------------------------------------------------
|
158
|
+
#
|
159
|
+
#---------------------------------------------------------------------------------------
|
160
|
+
|
161
|
+
def read(column_mapping, filters)
|
162
|
+
|
163
|
+
# initialize @processed_columns to a new Array. This will be used by method
|
164
|
+
# executeProcessor from module Processors. @column_mapping also needs to be initialized
|
165
|
+
# to the column_mapping received. Used by methods in module Processors
|
166
|
+
@processed_columns = Array.new
|
167
|
+
@column_mapping = column_mapping
|
168
|
+
|
169
|
+
data_read = (filters == false)? super([].to_java(CellProcessor)) :
|
170
|
+
super(filters.values.to_java(CellProcessor))
|
171
|
+
data_read.unshift(@key_array) if dimensions && data_read
|
172
|
+
data_read
|
173
|
+
end
|
174
|
+
|
175
|
+
end
|
176
|
+
|
177
|
+
#========================================================================================
|
178
|
+
# class CMR (CSV Map Reader) wraps class CsvMapReader
|
179
|
+
#========================================================================================
|
180
|
+
|
181
|
+
class CMR < org.supercsv.io.CsvMapReader
|
182
|
+
include_package "org.supercsv.cellprocessor.ift"
|
183
|
+
include Processors
|
184
|
+
|
185
|
+
# When dimensions are defined, then the composition of all dimensions is the 'key'
|
186
|
+
# attr_reader :key
|
187
|
+
|
188
|
+
#---------------------------------------------------------------------------------------
|
189
|
+
#
|
190
|
+
#---------------------------------------------------------------------------------------
|
191
|
+
|
192
|
+
def initialize(filereader, preferences, dimensions = nil, suppress_warnings)
|
193
|
+
@dimensions = dimensions
|
194
|
+
@suppress_warnings = suppress_warnings
|
195
|
+
super(filereader, preferences)
|
196
|
+
end
|
197
|
+
|
198
|
+
#---------------------------------------------------------------------------------------
|
199
|
+
#
|
200
|
+
#---------------------------------------------------------------------------------------
|
201
|
+
|
202
|
+
def read(column_mapping, filters)
|
203
|
+
|
204
|
+
# initialize @processed_columns to a new Hash. This will be used by method
|
205
|
+
# executeProcessor from module Processors
|
206
|
+
@processed_columns = Hash.new
|
207
|
+
@column_mapping = column_mapping
|
208
|
+
|
209
|
+
(filters == false)? super(*column_mapping.mapping) :
|
210
|
+
filter_input(column_mapping, filters.values.to_java(CellProcessor))
|
211
|
+
|
212
|
+
end
|
213
|
+
|
214
|
+
#---------------------------------------------------------------------------------------
|
215
|
+
#
|
216
|
+
#---------------------------------------------------------------------------------------
|
217
|
+
|
218
|
+
def filter_input(name_mapping, processors)
|
219
|
+
|
220
|
+
if (readRow())
|
221
|
+
processed_columns = executeProcessors(processors)
|
222
|
+
processed_columns[:key] = @key_array if dimensions
|
223
|
+
return processed_columns
|
224
|
+
end
|
225
|
+
|
226
|
+
end
|
227
|
+
|
228
|
+
end
|
229
|
+
|
230
|
+
end
|
231
|
+
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
3
|
+
##########################################################################################
|
4
|
+
# Copyright © 2013 Rodrigo Botafogo. All Rights Reserved. Permission to use, copy, modify,
|
5
|
+
# and distribute this software and its documentation, without fee and without a signed
|
6
|
+
# licensing agreement, is hereby granted, provided that the above copyright notice, this
|
7
|
+
# paragraph and the following two paragraphs appear in all copies, modifications, and
|
8
|
+
# distributions.
|
9
|
+
#
|
10
|
+
# IN NO EVENT SHALL RODRIGO BOTAFOGO BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
|
11
|
+
# INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
|
12
|
+
# THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF RODRIGO BOTAFOGO HAS BEEN ADVISED OF THE
|
13
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
14
|
+
#
|
15
|
+
# RODRIGO BOTAFOGO SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
16
|
+
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
|
17
|
+
# SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS".
|
18
|
+
# RODRIGO BOTAFOGO HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS,
|
19
|
+
# OR MODIFICATIONS.
|
20
|
+
##########################################################################################
|
21
|
+
|
22
|
+
require 'rubygems'
|
23
|
+
require "test/unit"
|
24
|
+
require 'shoulda'
|
25
|
+
|
26
|
+
require_relative '../config'
|
27
|
+
require 'jcsv'
|
28
|
+
|
29
|
+
require_relative 'test_customer_list'
|
30
|
+
require_relative 'test_customer_map'
|
31
|
+
require_relative 'test_dimensions'
|
32
|
+
require_relative 'test_deep_map'
|
33
|
+
require_relative 'test_critbit'
|
34
|
+
require_relative 'test_filters'
|
35
|
+
require_relative 'test_list_dimensions'
|
36
|
+
require_relative 'test_customer_nhlist'
|
37
|
+
require_relative 'test_mdarray'
|
@@ -0,0 +1,442 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
3
|
+
##########################################################################################
|
4
|
+
# Copyright © 2015 Rodrigo Botafogo. All Rights Reserved. Permission to use, copy, modify,
|
5
|
+
# and distribute this software and its documentation for educational, research, and
|
6
|
+
# not-for-profit purposes, without fee and without a signed licensing agreement, is hereby
|
7
|
+
# granted, provided that the above copyright notice, this paragraph and the following two
|
8
|
+
# paragraphs appear in all copies, modifications, and distributions. Contact Rodrigo
|
9
|
+
# Botafogo - rodrigo.a.botafogo@gmail.com for commercial licensing opportunities.
|
10
|
+
#
|
11
|
+
# IN NO EVENT SHALL RODRIGO BOTAFOGO BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
|
12
|
+
# INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
|
13
|
+
# THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF RODRIGO BOTAFOGO HAS BEEN ADVISED OF THE
|
14
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
15
|
+
#
|
16
|
+
# RODRIGO BOTAFOGO SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
17
|
+
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
|
18
|
+
# SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS".
|
19
|
+
# RODRIGO BOTAFOGO HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS,
|
20
|
+
# OR MODIFICATIONS.
|
21
|
+
##########################################################################################
|
22
|
+
|
23
|
+
require 'rubygems'
|
24
|
+
require 'test/unit'
|
25
|
+
require 'shoulda'
|
26
|
+
require 'matrix'
|
27
|
+
|
28
|
+
require 'pp'
|
29
|
+
require_relative '../config'
|
30
|
+
|
31
|
+
require 'jcsv'
|
32
|
+
|
33
|
+
class CSVTest < Test::Unit::TestCase
|
34
|
+
|
35
|
+
context "CSV test" do
|
36
|
+
|
37
|
+
setup do
|
38
|
+
|
39
|
+
end
|
40
|
+
|
41
|
+
#-------------------------------------------------------------------------------------
|
42
|
+
# When reading the CSV file in one big chunk and selecting deep_map: true, then each
|
43
|
+
# dimension will be hashed across all rows. [This is not clear at all!!! IMPROVE.]
|
44
|
+
#-------------------------------------------------------------------------------------
|
45
|
+
|
46
|
+
should "parse multi-dimension csv into a critbit, alphabetical order" do
|
47
|
+
|
48
|
+
reader = Jcsv.reader("../data/customer.csv", format: :critbit,
|
49
|
+
dimensions: [:last_name, :first_name])
|
50
|
+
|
51
|
+
customers = reader.read
|
52
|
+
assert_equal("Down.Bob", customers.keys[0])
|
53
|
+
assert_equal("Dunbar.John", customers.keys[1])
|
54
|
+
|
55
|
+
reader = Jcsv.reader("../data/customer.csv", format: :critbit,
|
56
|
+
dimensions: [:first_name, :last_name])
|
57
|
+
|
58
|
+
customers = reader.read
|
59
|
+
assert_equal("Alice.Wunderland", customers.keys[0])
|
60
|
+
assert_equal("Bill.Jobs", customers.keys[1])
|
61
|
+
|
62
|
+
end
|
63
|
+
|
64
|
+
#-------------------------------------------------------------------------------------
|
65
|
+
# Read data into a flat map. Allows random access to the data by use of the map
|
66
|
+
# 'key'. The 'key' is a string that concatenates the values of the dimensions's
|
67
|
+
# labels with a '.'.
|
68
|
+
#-------------------------------------------------------------------------------------
|
69
|
+
|
70
|
+
should "read data into flat critbit" do
|
71
|
+
|
72
|
+
reader = Jcsv.reader("../data/epilepsy.csv", format: :critbit,
|
73
|
+
dimensions: [:treatment, :subject, :period],
|
74
|
+
default_filter: Jcsv.int)
|
75
|
+
|
76
|
+
# remove the :patient field from the data, as this field is already given by the
|
77
|
+
# :subject field.
|
78
|
+
reader.mapping = {:patient => false}
|
79
|
+
|
80
|
+
# read all the data into a flat map (hash) with keys the dimensions values
|
81
|
+
# concatenated with '.'.
|
82
|
+
treatment = reader.read
|
83
|
+
# p treatment
|
84
|
+
|
85
|
+
assert_equal(11, treatment["placebo.1.1"][:base])
|
86
|
+
assert_equal(31, treatment["placebo.1.1"][:age])
|
87
|
+
assert_equal(5, treatment["placebo.1.1"][:"seizure.rate"])
|
88
|
+
|
89
|
+
assert_equal(31, treatment["Progabide.35.2"][:base])
|
90
|
+
assert_equal(30, treatment["Progabide.35.2"][:age])
|
91
|
+
assert_equal(17, treatment["Progabide.35.2"][:"seizure.rate"])
|
92
|
+
|
93
|
+
end
|
94
|
+
|
95
|
+
#-------------------------------------------------------------------------------------
|
96
|
+
# Read data into a flat map in chunks
|
97
|
+
#-------------------------------------------------------------------------------------
|
98
|
+
|
99
|
+
should "read data into flat critbit in chunks" do
|
100
|
+
|
101
|
+
# paramenter deep_map: is not passed. By default it is false
|
102
|
+
reader = Jcsv.reader("../data/epilepsy.csv", format: :critbit, chunk_size: 20,
|
103
|
+
dimensions: [:treatment, :subject, :period],
|
104
|
+
default_filter: Jcsv.int)
|
105
|
+
|
106
|
+
# remove the :patient field from the data, as this field is already given by the
|
107
|
+
# :subject field.
|
108
|
+
reader.mapping = {:patient => false}
|
109
|
+
treatment = reader.read
|
110
|
+
# p treatment
|
111
|
+
|
112
|
+
treatment_type = reader.dimensions[:treatment]
|
113
|
+
subject = reader.dimensions[:subject]
|
114
|
+
period = reader.dimensions[:period]
|
115
|
+
|
116
|
+
# variable labels has all dimension labels
|
117
|
+
assert_equal(0, treatment_type.labels["placebo"])
|
118
|
+
assert_equal(1, treatment_type.labels["Progabide"])
|
119
|
+
assert_equal(1, subject.labels["2"])
|
120
|
+
assert_equal(13, subject.labels["14"])
|
121
|
+
assert_equal(58, subject.labels["59"])
|
122
|
+
assert_equal(0, period.labels["1"])
|
123
|
+
assert_equal(3, period.labels["4"])
|
124
|
+
|
125
|
+
# we now need to access the first chunk [0] to get to the desired element
|
126
|
+
assert_equal(11, treatment[0]["placebo.1.1"][:base])
|
127
|
+
assert_equal(31, treatment[0]["placebo.1.1"][:age])
|
128
|
+
assert_equal(5, treatment[0]["placebo.1.1"][:"seizure.rate"])
|
129
|
+
|
130
|
+
# chunk [0] does not have key "Progabide.35.2"
|
131
|
+
assert_equal(nil, treatment[0]["Progabide.35.2"])
|
132
|
+
|
133
|
+
assert_equal(10, treatment[6]["Progabide.32.3"][:base])
|
134
|
+
assert_equal(30, treatment[6]["Progabide.32.3"][:age])
|
135
|
+
assert_equal(1, treatment[6]["Progabide.32.3"][:"seizure.rate"])
|
136
|
+
|
137
|
+
end
|
138
|
+
|
139
|
+
#-------------------------------------------------------------------------------------
|
140
|
+
#
|
141
|
+
#-------------------------------------------------------------------------------------
|
142
|
+
|
143
|
+
should "read to critbit in enumerable chunks" do
|
144
|
+
|
145
|
+
# paramenter deep_map: is not passed. By default it is false
|
146
|
+
reader = Jcsv.reader("../data/epilepsy.csv", format: :critbit, chunk_size: 20,
|
147
|
+
dimensions: [:treatment, :subject, :period],
|
148
|
+
default_filter: Jcsv.int)
|
149
|
+
|
150
|
+
# Method each without a block returns an enumerator
|
151
|
+
enum = reader.each
|
152
|
+
|
153
|
+
# read the first chunk. Chunk is of size 20
|
154
|
+
chunk = enum.next
|
155
|
+
data = chunk[2]
|
156
|
+
|
157
|
+
# in this case, only the first 20 rows were read, so only one treatment and six
|
158
|
+
# subjects were read until this point
|
159
|
+
assert_equal(1, reader.dimensions[:treatment].size)
|
160
|
+
# assert_equal(6, reader.dimensions[:subject].size)
|
161
|
+
|
162
|
+
assert_equal(8, data["placebo.4.4"][:base])
|
163
|
+
assert_equal(36, data["placebo.4.4"][:age])
|
164
|
+
assert_equal(4, data["placebo.4.4"][:"seizure.rate"])
|
165
|
+
|
166
|
+
# read the next chunk. Chunk is of size 20
|
167
|
+
chunk = enum.next
|
168
|
+
|
169
|
+
# read the next chunk... not interested in the second chunk for some reason...
|
170
|
+
chunk = enum.next
|
171
|
+
data = chunk[2]
|
172
|
+
|
173
|
+
# As we read new chunks of data, the dimensions labels accumulate, i.e., they are
|
174
|
+
# not erased between reads of every chunk (call to the next function). Dimensions
|
175
|
+
# are variables from the reader and not the chunk.
|
176
|
+
assert_equal(1, reader.dimensions[:treatment].size)
|
177
|
+
assert_equal(16, reader.dimensions[:subject].size)
|
178
|
+
|
179
|
+
assert_equal(33, data["placebo.12.2"][:base])
|
180
|
+
assert_equal(24, data["placebo.12.2"][:age])
|
181
|
+
assert_equal(6, data["placebo.12.2"][:"seizure.rate"])
|
182
|
+
|
183
|
+
end
|
184
|
+
|
185
|
+
#-------------------------------------------------------------------------------------
|
186
|
+
#
|
187
|
+
#-------------------------------------------------------------------------------------
|
188
|
+
|
189
|
+
should "read to critbit and pass to block with dimensions" do
|
190
|
+
|
191
|
+
reader = Jcsv.reader("../data/epilepsy.csv", format: :critbit,
|
192
|
+
dimensions: [:treatment, :subject, :period],
|
193
|
+
default_filter: Jcsv.int)
|
194
|
+
|
195
|
+
reader.read do |line_no, row_no, row|
|
196
|
+
assert_equal(1, row.keys.size)
|
197
|
+
end
|
198
|
+
|
199
|
+
end
|
200
|
+
|
201
|
+
#-------------------------------------------------------------------------------------
|
202
|
+
#
|
203
|
+
#-------------------------------------------------------------------------------------
|
204
|
+
|
205
|
+
should "read to critbit and pass to block with dimensions, chunk_size > 1" do
|
206
|
+
|
207
|
+
reader = Jcsv.reader("../data/epilepsy.csv", format: :critbit, chunk_size: 20,
|
208
|
+
dimensions: [:treatment, :subject, :period],
|
209
|
+
default_filter: Jcsv.int)
|
210
|
+
|
211
|
+
reader.read do |line_no, row_no, row|
|
212
|
+
assert_equal(20, row.keys.size) if line_no < 230
|
213
|
+
end
|
214
|
+
|
215
|
+
end
|
216
|
+
|
217
|
+
#-------------------------------------------------------------------------------------
|
218
|
+
#
|
219
|
+
#-------------------------------------------------------------------------------------
|
220
|
+
|
221
|
+
should "raise error if mapping a column to true in critbit" do
|
222
|
+
|
223
|
+
reader = Jcsv.reader("../data/epilepsy.csv", format: :critbit, chunk_size: :all,
|
224
|
+
dimensions: [:subject, :period],
|
225
|
+
default_filter: Jcsv.int)
|
226
|
+
|
227
|
+
# Raises an error, since mapping to true is not defined
|
228
|
+
assert_raise ( ArgumentError ) { reader.mapping =
|
229
|
+
{:treatment => false, :patient => true} }
|
230
|
+
|
231
|
+
end
|
232
|
+
|
233
|
+
#-------------------------------------------------------------------------------------
|
234
|
+
# When reading the CSV file in one big chunk and selecting deep_map: true, then each
|
235
|
+
# dimension will be hashed across all rows.
|
236
|
+
#-------------------------------------------------------------------------------------
|
237
|
+
|
238
|
+
should "parse multi-dimension csv file to critbit, chuk_size all and deep_map true" do
|
239
|
+
|
240
|
+
reader = Jcsv.reader("../data/epilepsy.csv", format: :critbit, chunk_size: :all,
|
241
|
+
dimensions: [:treatment, :subject, :period], deep_map: true)
|
242
|
+
|
243
|
+
# remove the :patient field from the data, as this field is already given by the
|
244
|
+
# :subject field.
|
245
|
+
reader.mapping = {:patient => false}
|
246
|
+
|
247
|
+
# since we are reading with chunk_size = :all, then we will only get one chunk back.
|
248
|
+
# Then we can get the first chunk by indexing read with 0: reader.read[0]
|
249
|
+
treatment = reader.read[0]
|
250
|
+
# p treatment
|
251
|
+
|
252
|
+
# get the dimensions
|
253
|
+
treatment_type = reader.dimensions[:treatment]
|
254
|
+
subject = reader.dimensions[:subject]
|
255
|
+
period = reader.dimensions[:period]
|
256
|
+
|
257
|
+
# variable labels has all dimension labels
|
258
|
+
assert_equal(0, treatment_type.labels["placebo"])
|
259
|
+
assert_equal(1, treatment_type.labels["Progabide"])
|
260
|
+
assert_equal(1, subject.labels["2"])
|
261
|
+
assert_equal(13, subject.labels["14"])
|
262
|
+
assert_equal(58, subject.labels["59"])
|
263
|
+
assert_equal(0, period.labels["1"])
|
264
|
+
assert_equal(3, period.labels["4"])
|
265
|
+
|
266
|
+
assert_equal("14", treatment["placebo"]["10"]["1"][:"seizure.rate"])
|
267
|
+
|
268
|
+
end
|
269
|
+
|
270
|
+
#-------------------------------------------------------------------------------------
|
271
|
+
#
|
272
|
+
#-------------------------------------------------------------------------------------
|
273
|
+
|
274
|
+
should "read data with dimensions, mapping and filters into a critbit" do
|
275
|
+
|
276
|
+
reader = Jcsv.reader("../data/epilepsy.csv", format: :critbit, chunk_size: :all,
|
277
|
+
dimensions: [:treatment, :subject, :period], deep_map: true,
|
278
|
+
default_filter: Jcsv.int)
|
279
|
+
|
280
|
+
# remove the :patient field from the data, as this field is already given by the
|
281
|
+
# :subject field.
|
282
|
+
reader.mapping = {:patient => false}
|
283
|
+
reader.filters = {:"seizure.rate" => Jcsv.float}
|
284
|
+
|
285
|
+
# will raise an exception as :period is not a key. Will break as soon as we read the
|
286
|
+
# first period for the second user
|
287
|
+
treatment = reader.read[0]
|
288
|
+
# p treatment
|
289
|
+
|
290
|
+
assert_equal(14.0, treatment["placebo"]["10"]["1"][:"seizure.rate"])
|
291
|
+
assert_equal(19.0, treatment["Progabide"]["45"]["1"][:"seizure.rate"])
|
292
|
+
|
293
|
+
end
|
294
|
+
|
295
|
+
#-------------------------------------------------------------------------------------
|
296
|
+
#
|
297
|
+
#-------------------------------------------------------------------------------------
|
298
|
+
|
299
|
+
should "read data with deep_map in critbit but chunk_size not all" do
|
300
|
+
|
301
|
+
reader = Jcsv.reader("../data/epilepsy.csv", format: :critbit, chunk_size: 20,
|
302
|
+
dimensions: [:treatment, :subject, :period], deep_map: true,
|
303
|
+
default_filter: Jcsv.int)
|
304
|
+
|
305
|
+
# remove the :patient field from the data, as this field is already given by the
|
306
|
+
# :subject field.
|
307
|
+
reader.mapping = {:patient => false}
|
308
|
+
reader.filters = {:"seizure.rate" => Jcsv.float}
|
309
|
+
|
310
|
+
# will raise an exception as :period is not a key. Will break as soon as we read the
|
311
|
+
# first period for the second user
|
312
|
+
treatment = reader.read
|
313
|
+
|
314
|
+
assert_equal(3.0, treatment[0]["placebo"]["2"]["1"][:"seizure.rate"])
|
315
|
+
# since only 20 rows read per chunk, there is no Progabide row yet. Note that there
|
316
|
+
# was data in the test above
|
317
|
+
assert_equal(nil, treatment[0]["Progabide"])
|
318
|
+
|
319
|
+
# chunk 10, has Progabide as a dimension
|
320
|
+
assert_equal(6.0, treatment[10]["Progabide"]["51"]["2"][:"seizure.rate"])
|
321
|
+
|
322
|
+
end
|
323
|
+
|
324
|
+
#-------------------------------------------------------------------------------------
|
325
|
+
#
|
326
|
+
#-------------------------------------------------------------------------------------
|
327
|
+
|
328
|
+
should "raise exception if key is repeated in critbit" do
|
329
|
+
|
330
|
+
reader = Jcsv.reader("../data/epilepsy.csv", format: :critbit, chunk_size: :all,
|
331
|
+
dimensions: [:period], deep_map: true)
|
332
|
+
|
333
|
+
# will raise an exception as :period is not a key. Will break as soon as we read the
|
334
|
+
# first period for the second user
|
335
|
+
assert_raise ( Jcsv::DuplicateKeyError ) { reader.read[0] }
|
336
|
+
|
337
|
+
end
|
338
|
+
|
339
|
+
#-------------------------------------------------------------------------------------
|
340
|
+
# When reading the CSV file in one big chunk and selecting deep_map: true, then each
|
341
|
+
# dimension will be hashed across all rows. [This is not clear at all!!! IMPROVE.]
|
342
|
+
#-------------------------------------------------------------------------------------
|
343
|
+
|
344
|
+
should "Show errors when dimensions are not in order or missing in critbit" do
|
345
|
+
|
346
|
+
reader = Jcsv.reader("../data/epilepsy.csv", format: :critbit, chunk_size: :all,
|
347
|
+
dimensions: [:period, :treatment, :subject], deep_map: true)
|
348
|
+
|
349
|
+
p "LOTS OF ERROR MESSAGES EXPECTED FROM HERE..."
|
350
|
+
|
351
|
+
# remove the :patient field from the data, as this field is already given by the
|
352
|
+
# :subject field.
|
353
|
+
reader.mapping = {:patient => false}
|
354
|
+
|
355
|
+
# since we are reading with chunk_size = :all, then we will only get one chunk back.
|
356
|
+
# Then we can get the first chunk by indexing read with 0: reader.read[0]
|
357
|
+
treatment = reader.read[0]
|
358
|
+
|
359
|
+
p "... TO HERE. If no error messages, then something is wrong!"
|
360
|
+
|
361
|
+
end
|
362
|
+
|
363
|
+
#-------------------------------------------------------------------------------------
|
364
|
+
# When reading the CSV file in one big chunk and selecting deep_map: true, then each
|
365
|
+
# dimension will be hashed across all rows. [This is not clear at all!!! IMPROVE.]
|
366
|
+
#-------------------------------------------------------------------------------------
|
367
|
+
|
368
|
+
should "Suppress warnings when dimensions are not in order or missing in critbit" do
|
369
|
+
|
370
|
+
reader = Jcsv.reader("../data/epilepsy.csv", format: :critbit, chunk_size: :all,
|
371
|
+
dimensions: [:period, :treatment, :subject], deep_map: true,
|
372
|
+
suppress_warnings: true)
|
373
|
+
|
374
|
+
p "No warning messages should be seen from here..."
|
375
|
+
|
376
|
+
# remove the :patient field from the data, as this field is already given by the
|
377
|
+
# :subject field.
|
378
|
+
reader.mapping = {:patient => false}
|
379
|
+
|
380
|
+
# since we are reading with chunk_size = :all, then we will only get one chunk back.
|
381
|
+
# Then we can get the first chunk by indexing read with 0: reader.read[0]
|
382
|
+
treatment = reader.read
|
383
|
+
# p treatment
|
384
|
+
|
385
|
+
p "... to here. If there are any warning messages then there is something wrong!"
|
386
|
+
|
387
|
+
end
|
388
|
+
|
389
|
+
#-------------------------------------------------------------------------------------
|
390
|
+
# There is a large difference when parsing multidimensional CSV files with chunks and
|
391
|
+
# no chunks. When no chunks are selected, this is identical to normal dimension
|
392
|
+
# reading.
|
393
|
+
#-------------------------------------------------------------------------------------
|
394
|
+
|
395
|
+
should "parse multi-dimension csv file to critbit no chunk" do
|
396
|
+
|
397
|
+
reader = Jcsv.reader("../data/epilepsy.csv", format: :critbit,
|
398
|
+
dimensions: [:treatment, :subject, :period], deep_map: true)
|
399
|
+
|
400
|
+
# remove the :patient field from the data, as this field is already given by the
|
401
|
+
# :subject field.
|
402
|
+
reader.mapping = {:patient => false}
|
403
|
+
|
404
|
+
# since we are reading with chunk_size = :all, then we will only get one chunk back.
|
405
|
+
# Then we can get the first chunk by indexing read with 0: reader.read[0]
|
406
|
+
treatment = reader.read
|
407
|
+
# p treatment
|
408
|
+
|
409
|
+
assert_equal("11", treatment["placebo.1.1"][:base])
|
410
|
+
assert_equal("31", treatment["placebo.1.1"][:age])
|
411
|
+
assert_equal("5", treatment["placebo.1.1"][:"seizure.rate"])
|
412
|
+
|
413
|
+
assert_equal("11", treatment["placebo.1.2"][:base])
|
414
|
+
assert_equal("31", treatment["placebo.1.2"][:age])
|
415
|
+
assert_equal("3", treatment["placebo.1.2"][:"seizure.rate"])
|
416
|
+
|
417
|
+
end
|
418
|
+
|
419
|
+
#-------------------------------------------------------------------------------------
|
420
|
+
# All examples until now had chunk_size :all, but they can have smaller size. In this
|
421
|
+
# example, chunk_size is 20 and it is processed by a block
|
422
|
+
#-------------------------------------------------------------------------------------
|
423
|
+
|
424
|
+
should "read with dimension and given a block in critbit" do
|
425
|
+
|
426
|
+
reader = Jcsv.reader("../data/epilepsy.csv", format: :critbit, chunk_size: 20,
|
427
|
+
dimensions: [:treatment, :subject, :period], deep_map: true,
|
428
|
+
default_filter: Jcsv.int)
|
429
|
+
|
430
|
+
reader.mapping = {:patient => false}
|
431
|
+
|
432
|
+
reader.read do |line_no, row_no, chunk|
|
433
|
+
p line_no
|
434
|
+
p row_no
|
435
|
+
p chunk
|
436
|
+
end
|
437
|
+
|
438
|
+
end
|
439
|
+
|
440
|
+
end
|
441
|
+
|
442
|
+
end
|