mdarray-jcsv 0.6.3-java
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE.txt +23 -0
- data/README.md +2 -0
- data/Rakefile +46 -0
- data/config.rb +104 -0
- data/lib/constraints.rb +205 -0
- data/lib/date_filters.rb +252 -0
- data/lib/dimensions.rb +276 -0
- data/lib/filters.rb +332 -0
- data/lib/jcsv.rb +107 -0
- data/lib/list_reader.rb +200 -0
- data/lib/locale.rb +192 -0
- data/lib/map_reader.rb +192 -0
- data/lib/mdarray-jcsv.rb +24 -0
- data/lib/mdarray_reader.rb +110 -0
- data/lib/numeric_filters.rb +225 -0
- data/lib/reader.rb +547 -0
- data/lib/supercsv_interface.rb +231 -0
- data/test/test_complete.rb +37 -0
- data/test/test_critbit.rb +442 -0
- data/test/test_customer_list.rb +436 -0
- data/test/test_customer_map.rb +209 -0
- data/test/test_customer_nhlist.rb +161 -0
- data/test/test_deep_map.rb +264 -0
- data/test/test_del.rb +73 -0
- data/test/test_dimensions.rb +231 -0
- data/test/test_example.rb +79 -0
- data/test/test_filters.rb +374 -0
- data/test/test_list_dimensions.rb +110 -0
- data/test/test_mdarray.rb +227 -0
- data/test/test_missing_data.rb +57 -0
- data/vendor/commons-beanutils-1.8.3.jar +0 -0
- data/vendor/commons-lang3-3.1.jar +0 -0
- data/vendor/dozer-5.4.0.jar +0 -0
- data/vendor/jcl-over-slf4j-1.6.6.jar +0 -0
- data/vendor/joda-time-2.7.jar +0 -0
- data/vendor/slf4j-api-1.7.5.jar +0 -0
- data/vendor/snakeyaml-1.14.jar +0 -0
- data/vendor/super-csv-2.4.0.jar +0 -0
- data/vendor/super-csv-dozer-2.4.0.jar +0 -0
- data/vendor/super-csv-java8-2.4.0.jar +0 -0
- data/vendor/super-csv-joda-2.4.0.jar +0 -0
- data/version.rb +2 -0
- metadata +196 -0
@@ -0,0 +1,231 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
3
|
+
##########################################################################################
|
4
|
+
# @author Rodrigo Botafogo
|
5
|
+
#
|
6
|
+
# Copyright © 2015 Rodrigo Botafogo. All Rights Reserved. Permission to use, copy, modify,
|
7
|
+
# and distribute this software and its documentation, without fee and without a signed
|
8
|
+
# licensing agreement, is hereby granted, provided that the above copyright notice, this
|
9
|
+
# paragraph and the following two paragraphs appear in all copies, modifications, and
|
10
|
+
# distributions.
|
11
|
+
#
|
12
|
+
# IN NO EVENT SHALL RODRIGO BOTAFOGO BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
|
13
|
+
# INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
|
14
|
+
# THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF RODRIGO BOTAFOGO HAS BEEN ADVISED OF THE
|
15
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
16
|
+
#
|
17
|
+
# RODRIGO BOTAFOGO SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
18
|
+
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
|
19
|
+
# SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS".
|
20
|
+
# RODRIGO BOTAFOGO HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS,
|
21
|
+
# OR MODIFICATIONS.
|
22
|
+
##########################################################################################
|
23
|
+
|
24
|
+
require_relative 'dimensions'
|
25
|
+
|
26
|
+
class Jcsv
|
27
|
+
include_package "org.supercsv.cellprocessor.ift"
|
28
|
+
|
29
|
+
#========================================================================================
|
30
|
+
# Mapping contains a mapping from column names to:
|
31
|
+
# * other column names: when we want to change the name of the column
|
32
|
+
# * false: when we want to remove the column from reading
|
33
|
+
# * true: when the column is a dimensions
|
34
|
+
# If there is no mapping then the column number maps to itself
|
35
|
+
#========================================================================================
|
36
|
+
|
37
|
+
class Mapping
|
38
|
+
|
39
|
+
attr_accessor :mapping
|
40
|
+
|
41
|
+
def initialize
|
42
|
+
@mapping = nil
|
43
|
+
end
|
44
|
+
|
45
|
+
def [](index)
|
46
|
+
# p "#{@mapping}, #{index}"
|
47
|
+
(@mapping.nil?)? index : @mapping[index]
|
48
|
+
end
|
49
|
+
|
50
|
+
def []=(index, value)
|
51
|
+
@mapping[index] = value
|
52
|
+
end
|
53
|
+
|
54
|
+
end
|
55
|
+
|
56
|
+
#========================================================================================
|
57
|
+
# Module Processors interfaces the Ruby code with the SuperCsv cell processors.
|
58
|
+
#========================================================================================
|
59
|
+
|
60
|
+
module Processors
|
61
|
+
include_package "org.supercsv.util"
|
62
|
+
include_package "org.supercsv.exception"
|
63
|
+
|
64
|
+
attr_reader :dimensions
|
65
|
+
attr_reader :key_array
|
66
|
+
|
67
|
+
#---------------------------------------------------------------------------------------
|
68
|
+
#
|
69
|
+
#---------------------------------------------------------------------------------------
|
70
|
+
|
71
|
+
def headers
|
72
|
+
@headers ||= getHeader(true).to_a
|
73
|
+
end
|
74
|
+
|
75
|
+
#---------------------------------------------------------------------------------------
|
76
|
+
# This method uses variable @processed_columns that should be initialized in the class
|
77
|
+
# that includes this module. In the case of a list_reader for instance, processed_columns
|
78
|
+
# is initalized as an Array. For map_reader, processed_columns is initalized as a
|
79
|
+
# Hash. So, processed_columns is a data structure for storing the data processed. The
|
80
|
+
# mapping defines where the data should be stored in this data structure. In the case
|
81
|
+
# of list_reader, mapping[i] = i, for map_reader, mapping[i] = <mapping name for hash>
|
82
|
+
#---------------------------------------------------------------------------------------
|
83
|
+
|
84
|
+
def executeProcessors(processors)
|
85
|
+
|
86
|
+
source = getColumns()
|
87
|
+
|
88
|
+
context = CsvContext.new(getLineNumber(), getRowNumber(), 1);
|
89
|
+
context.setRowSource(source);
|
90
|
+
|
91
|
+
# raise "The number of columns to be processed #{source.size} must match the number of
|
92
|
+
# CellProcessors #{processors.length}" if (source.size != processors.length)
|
93
|
+
|
94
|
+
@key_array = Array.new
|
95
|
+
|
96
|
+
source.each_with_index do |s, i|
|
97
|
+
begin
|
98
|
+
# is @column_mapping[i] ever nil? I don't think so... CHECK!!!
|
99
|
+
next if ((@column_mapping[i] == false) || (@column_mapping[i].nil?))
|
100
|
+
# if column mapping is 'true', then this column is a dimension and the data in this
|
101
|
+
# column is part of the key
|
102
|
+
if (@column_mapping[i] == true)
|
103
|
+
begin
|
104
|
+
@dimensions[@headers[i]] = s
|
105
|
+
rescue RuntimeError => e
|
106
|
+
puts "Warning reading row: #{source.toString()} in field '#{@headers[i]}'. " +
|
107
|
+
e.message if !@suppress_warnings
|
108
|
+
# raise "Error reading row: #{source.toString()} in field '#{@headers[i]}'. " +
|
109
|
+
# e.message
|
110
|
+
end
|
111
|
+
@key_array[@dimensions.dimensions_names.index(@headers[i])] = s
|
112
|
+
next
|
113
|
+
end
|
114
|
+
|
115
|
+
context.setColumnNumber(i + 1)
|
116
|
+
if (i >= processors.size)
|
117
|
+
@processed_columns[@column_mapping[i]] = s
|
118
|
+
else
|
119
|
+
if (processors[i] == nil)
|
120
|
+
@processed_columns[@column_mapping[i]] = s
|
121
|
+
else
|
122
|
+
cell = processors[i].execute(s, context)
|
123
|
+
# cell = (cell.is_a? Jcsv::Pack)? cell.ruby_obj : cell
|
124
|
+
@processed_columns[@column_mapping[i]] = cell
|
125
|
+
end
|
126
|
+
end
|
127
|
+
rescue SuperCsvConstraintViolationException => e
|
128
|
+
raise Jcsv::ContraintViolation.new("Constraint violation: #{context.toString}")
|
129
|
+
end
|
130
|
+
|
131
|
+
end
|
132
|
+
|
133
|
+
@processed_columns
|
134
|
+
|
135
|
+
end
|
136
|
+
|
137
|
+
end
|
138
|
+
|
139
|
+
#========================================================================================
|
140
|
+
# Class CLR (CSV List Reader) wraps java CsvListReader.
|
141
|
+
#========================================================================================
|
142
|
+
|
143
|
+
class CLR < org.supercsv.io.CsvListReader
|
144
|
+
include_package "org.supercsv.cellprocessor.ift"
|
145
|
+
include Processors
|
146
|
+
|
147
|
+
#---------------------------------------------------------------------------------------
|
148
|
+
#
|
149
|
+
#---------------------------------------------------------------------------------------
|
150
|
+
|
151
|
+
def initialize(filereader, preferences, dimensions = nil, suppress_warnings)
|
152
|
+
@dimensions = dimensions
|
153
|
+
@suppress_warnings = suppress_warnings
|
154
|
+
super(filereader, preferences)
|
155
|
+
end
|
156
|
+
|
157
|
+
#---------------------------------------------------------------------------------------
|
158
|
+
#
|
159
|
+
#---------------------------------------------------------------------------------------
|
160
|
+
|
161
|
+
def read(column_mapping, filters)
|
162
|
+
|
163
|
+
# initialize @processed_columns to a new Array. This will be used by method
|
164
|
+
# executeProcessor from module Processors. @column_mapping also needs to be initialized
|
165
|
+
# to the column_mapping received. Used by methods in module Processors
|
166
|
+
@processed_columns = Array.new
|
167
|
+
@column_mapping = column_mapping
|
168
|
+
|
169
|
+
data_read = (filters == false)? super([].to_java(CellProcessor)) :
|
170
|
+
super(filters.values.to_java(CellProcessor))
|
171
|
+
data_read.unshift(@key_array) if dimensions && data_read
|
172
|
+
data_read
|
173
|
+
end
|
174
|
+
|
175
|
+
end
|
176
|
+
|
177
|
+
#========================================================================================
|
178
|
+
# class CMR (CSV Map Reader) wraps class CsvMapReader
|
179
|
+
#========================================================================================
|
180
|
+
|
181
|
+
class CMR < org.supercsv.io.CsvMapReader
|
182
|
+
include_package "org.supercsv.cellprocessor.ift"
|
183
|
+
include Processors
|
184
|
+
|
185
|
+
# When dimensions are defined, then the composition of all dimensions is the 'key'
|
186
|
+
# attr_reader :key
|
187
|
+
|
188
|
+
#---------------------------------------------------------------------------------------
|
189
|
+
#
|
190
|
+
#---------------------------------------------------------------------------------------
|
191
|
+
|
192
|
+
def initialize(filereader, preferences, dimensions = nil, suppress_warnings)
|
193
|
+
@dimensions = dimensions
|
194
|
+
@suppress_warnings = suppress_warnings
|
195
|
+
super(filereader, preferences)
|
196
|
+
end
|
197
|
+
|
198
|
+
#---------------------------------------------------------------------------------------
|
199
|
+
#
|
200
|
+
#---------------------------------------------------------------------------------------
|
201
|
+
|
202
|
+
def read(column_mapping, filters)
|
203
|
+
|
204
|
+
# initialize @processed_columns to a new Hash. This will be used by method
|
205
|
+
# executeProcessor from module Processors
|
206
|
+
@processed_columns = Hash.new
|
207
|
+
@column_mapping = column_mapping
|
208
|
+
|
209
|
+
(filters == false)? super(*column_mapping.mapping) :
|
210
|
+
filter_input(column_mapping, filters.values.to_java(CellProcessor))
|
211
|
+
|
212
|
+
end
|
213
|
+
|
214
|
+
#---------------------------------------------------------------------------------------
|
215
|
+
#
|
216
|
+
#---------------------------------------------------------------------------------------
|
217
|
+
|
218
|
+
def filter_input(name_mapping, processors)
|
219
|
+
|
220
|
+
if (readRow())
|
221
|
+
processed_columns = executeProcessors(processors)
|
222
|
+
processed_columns[:key] = @key_array if dimensions
|
223
|
+
return processed_columns
|
224
|
+
end
|
225
|
+
|
226
|
+
end
|
227
|
+
|
228
|
+
end
|
229
|
+
|
230
|
+
end
|
231
|
+
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
3
|
+
##########################################################################################
|
4
|
+
# Copyright © 2013 Rodrigo Botafogo. All Rights Reserved. Permission to use, copy, modify,
|
5
|
+
# and distribute this software and its documentation, without fee and without a signed
|
6
|
+
# licensing agreement, is hereby granted, provided that the above copyright notice, this
|
7
|
+
# paragraph and the following two paragraphs appear in all copies, modifications, and
|
8
|
+
# distributions.
|
9
|
+
#
|
10
|
+
# IN NO EVENT SHALL RODRIGO BOTAFOGO BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
|
11
|
+
# INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
|
12
|
+
# THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF RODRIGO BOTAFOGO HAS BEEN ADVISED OF THE
|
13
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
14
|
+
#
|
15
|
+
# RODRIGO BOTAFOGO SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
16
|
+
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
|
17
|
+
# SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS".
|
18
|
+
# RODRIGO BOTAFOGO HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS,
|
19
|
+
# OR MODIFICATIONS.
|
20
|
+
##########################################################################################
|
21
|
+
|
22
|
+
require 'rubygems'
|
23
|
+
require "test/unit"
|
24
|
+
require 'shoulda'
|
25
|
+
|
26
|
+
require_relative '../config'
|
27
|
+
require 'jcsv'
|
28
|
+
|
29
|
+
require_relative 'test_customer_list'
|
30
|
+
require_relative 'test_customer_map'
|
31
|
+
require_relative 'test_dimensions'
|
32
|
+
require_relative 'test_deep_map'
|
33
|
+
require_relative 'test_critbit'
|
34
|
+
require_relative 'test_filters'
|
35
|
+
require_relative 'test_list_dimensions'
|
36
|
+
require_relative 'test_customer_nhlist'
|
37
|
+
require_relative 'test_mdarray'
|
@@ -0,0 +1,442 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
3
|
+
##########################################################################################
|
4
|
+
# Copyright © 2015 Rodrigo Botafogo. All Rights Reserved. Permission to use, copy, modify,
|
5
|
+
# and distribute this software and its documentation for educational, research, and
|
6
|
+
# not-for-profit purposes, without fee and without a signed licensing agreement, is hereby
|
7
|
+
# granted, provided that the above copyright notice, this paragraph and the following two
|
8
|
+
# paragraphs appear in all copies, modifications, and distributions. Contact Rodrigo
|
9
|
+
# Botafogo - rodrigo.a.botafogo@gmail.com for commercial licensing opportunities.
|
10
|
+
#
|
11
|
+
# IN NO EVENT SHALL RODRIGO BOTAFOGO BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
|
12
|
+
# INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
|
13
|
+
# THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF RODRIGO BOTAFOGO HAS BEEN ADVISED OF THE
|
14
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
15
|
+
#
|
16
|
+
# RODRIGO BOTAFOGO SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
17
|
+
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
|
18
|
+
# SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS".
|
19
|
+
# RODRIGO BOTAFOGO HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS,
|
20
|
+
# OR MODIFICATIONS.
|
21
|
+
##########################################################################################
|
22
|
+
|
23
|
+
require 'rubygems'
|
24
|
+
require 'test/unit'
|
25
|
+
require 'shoulda'
|
26
|
+
require 'matrix'
|
27
|
+
|
28
|
+
require 'pp'
|
29
|
+
require_relative '../config'
|
30
|
+
|
31
|
+
require 'jcsv'
|
32
|
+
|
33
|
+
class CSVTest < Test::Unit::TestCase
|
34
|
+
|
35
|
+
context "CSV test" do
|
36
|
+
|
37
|
+
setup do
|
38
|
+
|
39
|
+
end
|
40
|
+
|
41
|
+
#-------------------------------------------------------------------------------------
|
42
|
+
# When reading the CSV file in one big chunk and selecting deep_map: true, then each
|
43
|
+
# dimension will be hashed across all rows. [This is not clear at all!!! IMPROVE.]
|
44
|
+
#-------------------------------------------------------------------------------------
|
45
|
+
|
46
|
+
should "parse multi-dimension csv into a critbit, alphabetical order" do
|
47
|
+
|
48
|
+
reader = Jcsv.reader("../data/customer.csv", format: :critbit,
|
49
|
+
dimensions: [:last_name, :first_name])
|
50
|
+
|
51
|
+
customers = reader.read
|
52
|
+
assert_equal("Down.Bob", customers.keys[0])
|
53
|
+
assert_equal("Dunbar.John", customers.keys[1])
|
54
|
+
|
55
|
+
reader = Jcsv.reader("../data/customer.csv", format: :critbit,
|
56
|
+
dimensions: [:first_name, :last_name])
|
57
|
+
|
58
|
+
customers = reader.read
|
59
|
+
assert_equal("Alice.Wunderland", customers.keys[0])
|
60
|
+
assert_equal("Bill.Jobs", customers.keys[1])
|
61
|
+
|
62
|
+
end
|
63
|
+
|
64
|
+
#-------------------------------------------------------------------------------------
|
65
|
+
# Read data into a flat map. Allows random access to the data by use of the map
|
66
|
+
# 'key'. The 'key' is a string that concatenates the values of the dimensions's
|
67
|
+
# labels with a '.'.
|
68
|
+
#-------------------------------------------------------------------------------------
|
69
|
+
|
70
|
+
should "read data into flat critbit" do
|
71
|
+
|
72
|
+
reader = Jcsv.reader("../data/epilepsy.csv", format: :critbit,
|
73
|
+
dimensions: [:treatment, :subject, :period],
|
74
|
+
default_filter: Jcsv.int)
|
75
|
+
|
76
|
+
# remove the :patient field from the data, as this field is already given by the
|
77
|
+
# :subject field.
|
78
|
+
reader.mapping = {:patient => false}
|
79
|
+
|
80
|
+
# read all the data into a flat map (hash) with keys the dimensions values
|
81
|
+
# concatenated with '.'.
|
82
|
+
treatment = reader.read
|
83
|
+
# p treatment
|
84
|
+
|
85
|
+
assert_equal(11, treatment["placebo.1.1"][:base])
|
86
|
+
assert_equal(31, treatment["placebo.1.1"][:age])
|
87
|
+
assert_equal(5, treatment["placebo.1.1"][:"seizure.rate"])
|
88
|
+
|
89
|
+
assert_equal(31, treatment["Progabide.35.2"][:base])
|
90
|
+
assert_equal(30, treatment["Progabide.35.2"][:age])
|
91
|
+
assert_equal(17, treatment["Progabide.35.2"][:"seizure.rate"])
|
92
|
+
|
93
|
+
end
|
94
|
+
|
95
|
+
#-------------------------------------------------------------------------------------
|
96
|
+
# Read data into a flat map in chunks
|
97
|
+
#-------------------------------------------------------------------------------------
|
98
|
+
|
99
|
+
should "read data into flat critbit in chunks" do
|
100
|
+
|
101
|
+
# paramenter deep_map: is not passed. By default it is false
|
102
|
+
reader = Jcsv.reader("../data/epilepsy.csv", format: :critbit, chunk_size: 20,
|
103
|
+
dimensions: [:treatment, :subject, :period],
|
104
|
+
default_filter: Jcsv.int)
|
105
|
+
|
106
|
+
# remove the :patient field from the data, as this field is already given by the
|
107
|
+
# :subject field.
|
108
|
+
reader.mapping = {:patient => false}
|
109
|
+
treatment = reader.read
|
110
|
+
# p treatment
|
111
|
+
|
112
|
+
treatment_type = reader.dimensions[:treatment]
|
113
|
+
subject = reader.dimensions[:subject]
|
114
|
+
period = reader.dimensions[:period]
|
115
|
+
|
116
|
+
# variable labels has all dimension labels
|
117
|
+
assert_equal(0, treatment_type.labels["placebo"])
|
118
|
+
assert_equal(1, treatment_type.labels["Progabide"])
|
119
|
+
assert_equal(1, subject.labels["2"])
|
120
|
+
assert_equal(13, subject.labels["14"])
|
121
|
+
assert_equal(58, subject.labels["59"])
|
122
|
+
assert_equal(0, period.labels["1"])
|
123
|
+
assert_equal(3, period.labels["4"])
|
124
|
+
|
125
|
+
# we now need to access the first chunk [0] to get to the desired element
|
126
|
+
assert_equal(11, treatment[0]["placebo.1.1"][:base])
|
127
|
+
assert_equal(31, treatment[0]["placebo.1.1"][:age])
|
128
|
+
assert_equal(5, treatment[0]["placebo.1.1"][:"seizure.rate"])
|
129
|
+
|
130
|
+
# chunk [0] does not have key "Progabide.35.2"
|
131
|
+
assert_equal(nil, treatment[0]["Progabide.35.2"])
|
132
|
+
|
133
|
+
assert_equal(10, treatment[6]["Progabide.32.3"][:base])
|
134
|
+
assert_equal(30, treatment[6]["Progabide.32.3"][:age])
|
135
|
+
assert_equal(1, treatment[6]["Progabide.32.3"][:"seizure.rate"])
|
136
|
+
|
137
|
+
end
|
138
|
+
|
139
|
+
#-------------------------------------------------------------------------------------
|
140
|
+
#
|
141
|
+
#-------------------------------------------------------------------------------------
|
142
|
+
|
143
|
+
should "read to critbit in enumerable chunks" do
|
144
|
+
|
145
|
+
# paramenter deep_map: is not passed. By default it is false
|
146
|
+
reader = Jcsv.reader("../data/epilepsy.csv", format: :critbit, chunk_size: 20,
|
147
|
+
dimensions: [:treatment, :subject, :period],
|
148
|
+
default_filter: Jcsv.int)
|
149
|
+
|
150
|
+
# Method each without a block returns an enumerator
|
151
|
+
enum = reader.each
|
152
|
+
|
153
|
+
# read the first chunk. Chunk is of size 20
|
154
|
+
chunk = enum.next
|
155
|
+
data = chunk[2]
|
156
|
+
|
157
|
+
# in this case, only the first 20 rows were read, so only one treatment and six
|
158
|
+
# subjects were read until this point
|
159
|
+
assert_equal(1, reader.dimensions[:treatment].size)
|
160
|
+
# assert_equal(6, reader.dimensions[:subject].size)
|
161
|
+
|
162
|
+
assert_equal(8, data["placebo.4.4"][:base])
|
163
|
+
assert_equal(36, data["placebo.4.4"][:age])
|
164
|
+
assert_equal(4, data["placebo.4.4"][:"seizure.rate"])
|
165
|
+
|
166
|
+
# read the next chunk. Chunk is of size 20
|
167
|
+
chunk = enum.next
|
168
|
+
|
169
|
+
# read the next chunk... not interested in the second chunk for some reason...
|
170
|
+
chunk = enum.next
|
171
|
+
data = chunk[2]
|
172
|
+
|
173
|
+
# As we read new chunks of data, the dimensions labels accumulate, i.e., they are
|
174
|
+
# not erased between reads of every chunk (call to the next function). Dimensions
|
175
|
+
# are variables from the reader and not the chunk.
|
176
|
+
assert_equal(1, reader.dimensions[:treatment].size)
|
177
|
+
assert_equal(16, reader.dimensions[:subject].size)
|
178
|
+
|
179
|
+
assert_equal(33, data["placebo.12.2"][:base])
|
180
|
+
assert_equal(24, data["placebo.12.2"][:age])
|
181
|
+
assert_equal(6, data["placebo.12.2"][:"seizure.rate"])
|
182
|
+
|
183
|
+
end
|
184
|
+
|
185
|
+
#-------------------------------------------------------------------------------------
|
186
|
+
#
|
187
|
+
#-------------------------------------------------------------------------------------
|
188
|
+
|
189
|
+
should "read to critbit and pass to block with dimensions" do
|
190
|
+
|
191
|
+
reader = Jcsv.reader("../data/epilepsy.csv", format: :critbit,
|
192
|
+
dimensions: [:treatment, :subject, :period],
|
193
|
+
default_filter: Jcsv.int)
|
194
|
+
|
195
|
+
reader.read do |line_no, row_no, row|
|
196
|
+
assert_equal(1, row.keys.size)
|
197
|
+
end
|
198
|
+
|
199
|
+
end
|
200
|
+
|
201
|
+
#-------------------------------------------------------------------------------------
|
202
|
+
#
|
203
|
+
#-------------------------------------------------------------------------------------
|
204
|
+
|
205
|
+
should "read to critbit and pass to block with dimensions, chunk_size > 1" do
|
206
|
+
|
207
|
+
reader = Jcsv.reader("../data/epilepsy.csv", format: :critbit, chunk_size: 20,
|
208
|
+
dimensions: [:treatment, :subject, :period],
|
209
|
+
default_filter: Jcsv.int)
|
210
|
+
|
211
|
+
reader.read do |line_no, row_no, row|
|
212
|
+
assert_equal(20, row.keys.size) if line_no < 230
|
213
|
+
end
|
214
|
+
|
215
|
+
end
|
216
|
+
|
217
|
+
#-------------------------------------------------------------------------------------
|
218
|
+
#
|
219
|
+
#-------------------------------------------------------------------------------------
|
220
|
+
|
221
|
+
should "raise error if mapping a column to true in critbit" do
|
222
|
+
|
223
|
+
reader = Jcsv.reader("../data/epilepsy.csv", format: :critbit, chunk_size: :all,
|
224
|
+
dimensions: [:subject, :period],
|
225
|
+
default_filter: Jcsv.int)
|
226
|
+
|
227
|
+
# Raises an error, since mapping to true is not defined
|
228
|
+
assert_raise ( ArgumentError ) { reader.mapping =
|
229
|
+
{:treatment => false, :patient => true} }
|
230
|
+
|
231
|
+
end
|
232
|
+
|
233
|
+
#-------------------------------------------------------------------------------------
|
234
|
+
# When reading the CSV file in one big chunk and selecting deep_map: true, then each
|
235
|
+
# dimension will be hashed across all rows.
|
236
|
+
#-------------------------------------------------------------------------------------
|
237
|
+
|
238
|
+
should "parse multi-dimension csv file to critbit, chuk_size all and deep_map true" do
|
239
|
+
|
240
|
+
reader = Jcsv.reader("../data/epilepsy.csv", format: :critbit, chunk_size: :all,
|
241
|
+
dimensions: [:treatment, :subject, :period], deep_map: true)
|
242
|
+
|
243
|
+
# remove the :patient field from the data, as this field is already given by the
|
244
|
+
# :subject field.
|
245
|
+
reader.mapping = {:patient => false}
|
246
|
+
|
247
|
+
# since we are reading with chunk_size = :all, then we will only get one chunk back.
|
248
|
+
# Then we can get the first chunk by indexing read with 0: reader.read[0]
|
249
|
+
treatment = reader.read[0]
|
250
|
+
# p treatment
|
251
|
+
|
252
|
+
# get the dimensions
|
253
|
+
treatment_type = reader.dimensions[:treatment]
|
254
|
+
subject = reader.dimensions[:subject]
|
255
|
+
period = reader.dimensions[:period]
|
256
|
+
|
257
|
+
# variable labels has all dimension labels
|
258
|
+
assert_equal(0, treatment_type.labels["placebo"])
|
259
|
+
assert_equal(1, treatment_type.labels["Progabide"])
|
260
|
+
assert_equal(1, subject.labels["2"])
|
261
|
+
assert_equal(13, subject.labels["14"])
|
262
|
+
assert_equal(58, subject.labels["59"])
|
263
|
+
assert_equal(0, period.labels["1"])
|
264
|
+
assert_equal(3, period.labels["4"])
|
265
|
+
|
266
|
+
assert_equal("14", treatment["placebo"]["10"]["1"][:"seizure.rate"])
|
267
|
+
|
268
|
+
end
|
269
|
+
|
270
|
+
#-------------------------------------------------------------------------------------
|
271
|
+
#
|
272
|
+
#-------------------------------------------------------------------------------------
|
273
|
+
|
274
|
+
should "read data with dimensions, mapping and filters into a critbit" do
|
275
|
+
|
276
|
+
reader = Jcsv.reader("../data/epilepsy.csv", format: :critbit, chunk_size: :all,
|
277
|
+
dimensions: [:treatment, :subject, :period], deep_map: true,
|
278
|
+
default_filter: Jcsv.int)
|
279
|
+
|
280
|
+
# remove the :patient field from the data, as this field is already given by the
|
281
|
+
# :subject field.
|
282
|
+
reader.mapping = {:patient => false}
|
283
|
+
reader.filters = {:"seizure.rate" => Jcsv.float}
|
284
|
+
|
285
|
+
# will raise an exception as :period is not a key. Will break as soon as we read the
|
286
|
+
# first period for the second user
|
287
|
+
treatment = reader.read[0]
|
288
|
+
# p treatment
|
289
|
+
|
290
|
+
assert_equal(14.0, treatment["placebo"]["10"]["1"][:"seizure.rate"])
|
291
|
+
assert_equal(19.0, treatment["Progabide"]["45"]["1"][:"seizure.rate"])
|
292
|
+
|
293
|
+
end
|
294
|
+
|
295
|
+
#-------------------------------------------------------------------------------------
|
296
|
+
#
|
297
|
+
#-------------------------------------------------------------------------------------
|
298
|
+
|
299
|
+
should "read data with deep_map in critbit but chunk_size not all" do
|
300
|
+
|
301
|
+
reader = Jcsv.reader("../data/epilepsy.csv", format: :critbit, chunk_size: 20,
|
302
|
+
dimensions: [:treatment, :subject, :period], deep_map: true,
|
303
|
+
default_filter: Jcsv.int)
|
304
|
+
|
305
|
+
# remove the :patient field from the data, as this field is already given by the
|
306
|
+
# :subject field.
|
307
|
+
reader.mapping = {:patient => false}
|
308
|
+
reader.filters = {:"seizure.rate" => Jcsv.float}
|
309
|
+
|
310
|
+
# will raise an exception as :period is not a key. Will break as soon as we read the
|
311
|
+
# first period for the second user
|
312
|
+
treatment = reader.read
|
313
|
+
|
314
|
+
assert_equal(3.0, treatment[0]["placebo"]["2"]["1"][:"seizure.rate"])
|
315
|
+
# since only 20 rows read per chunk, there is no Progabide row yet. Note that there
|
316
|
+
# was data in the test above
|
317
|
+
assert_equal(nil, treatment[0]["Progabide"])
|
318
|
+
|
319
|
+
# chunk 10, has Progabide as a dimension
|
320
|
+
assert_equal(6.0, treatment[10]["Progabide"]["51"]["2"][:"seizure.rate"])
|
321
|
+
|
322
|
+
end
|
323
|
+
|
324
|
+
#-------------------------------------------------------------------------------------
|
325
|
+
#
|
326
|
+
#-------------------------------------------------------------------------------------
|
327
|
+
|
328
|
+
should "raise exception if key is repeated in critbit" do
|
329
|
+
|
330
|
+
reader = Jcsv.reader("../data/epilepsy.csv", format: :critbit, chunk_size: :all,
|
331
|
+
dimensions: [:period], deep_map: true)
|
332
|
+
|
333
|
+
# will raise an exception as :period is not a key. Will break as soon as we read the
|
334
|
+
# first period for the second user
|
335
|
+
assert_raise ( Jcsv::DuplicateKeyError ) { reader.read[0] }
|
336
|
+
|
337
|
+
end
|
338
|
+
|
339
|
+
#-------------------------------------------------------------------------------------
|
340
|
+
# When reading the CSV file in one big chunk and selecting deep_map: true, then each
|
341
|
+
# dimension will be hashed across all rows. [This is not clear at all!!! IMPROVE.]
|
342
|
+
#-------------------------------------------------------------------------------------
|
343
|
+
|
344
|
+
should "Show errors when dimensions are not in order or missing in critbit" do
|
345
|
+
|
346
|
+
reader = Jcsv.reader("../data/epilepsy.csv", format: :critbit, chunk_size: :all,
|
347
|
+
dimensions: [:period, :treatment, :subject], deep_map: true)
|
348
|
+
|
349
|
+
p "LOTS OF ERROR MESSAGES EXPECTED FROM HERE..."
|
350
|
+
|
351
|
+
# remove the :patient field from the data, as this field is already given by the
|
352
|
+
# :subject field.
|
353
|
+
reader.mapping = {:patient => false}
|
354
|
+
|
355
|
+
# since we are reading with chunk_size = :all, then we will only get one chunk back.
|
356
|
+
# Then we can get the first chunk by indexing read with 0: reader.read[0]
|
357
|
+
treatment = reader.read[0]
|
358
|
+
|
359
|
+
p "... TO HERE. If no error messages, then something is wrong!"
|
360
|
+
|
361
|
+
end
|
362
|
+
|
363
|
+
#-------------------------------------------------------------------------------------
|
364
|
+
# When reading the CSV file in one big chunk and selecting deep_map: true, then each
|
365
|
+
# dimension will be hashed across all rows. [This is not clear at all!!! IMPROVE.]
|
366
|
+
#-------------------------------------------------------------------------------------
|
367
|
+
|
368
|
+
should "Suppress warnings when dimensions are not in order or missing in critbit" do
|
369
|
+
|
370
|
+
reader = Jcsv.reader("../data/epilepsy.csv", format: :critbit, chunk_size: :all,
|
371
|
+
dimensions: [:period, :treatment, :subject], deep_map: true,
|
372
|
+
suppress_warnings: true)
|
373
|
+
|
374
|
+
p "No warning messages should be seen from here..."
|
375
|
+
|
376
|
+
# remove the :patient field from the data, as this field is already given by the
|
377
|
+
# :subject field.
|
378
|
+
reader.mapping = {:patient => false}
|
379
|
+
|
380
|
+
# since we are reading with chunk_size = :all, then we will only get one chunk back.
|
381
|
+
# Then we can get the first chunk by indexing read with 0: reader.read[0]
|
382
|
+
treatment = reader.read
|
383
|
+
# p treatment
|
384
|
+
|
385
|
+
p "... to here. If there are any warning messages then there is something wrong!"
|
386
|
+
|
387
|
+
end
|
388
|
+
|
389
|
+
#-------------------------------------------------------------------------------------
|
390
|
+
# There is a large difference when parsing multidimensional CSV files with chunks and
|
391
|
+
# no chunks. When no chunks are selected, this is identical to normal dimension
|
392
|
+
# reading.
|
393
|
+
#-------------------------------------------------------------------------------------
|
394
|
+
|
395
|
+
should "parse multi-dimension csv file to critbit no chunk" do
|
396
|
+
|
397
|
+
reader = Jcsv.reader("../data/epilepsy.csv", format: :critbit,
|
398
|
+
dimensions: [:treatment, :subject, :period], deep_map: true)
|
399
|
+
|
400
|
+
# remove the :patient field from the data, as this field is already given by the
|
401
|
+
# :subject field.
|
402
|
+
reader.mapping = {:patient => false}
|
403
|
+
|
404
|
+
# since we are reading with chunk_size = :all, then we will only get one chunk back.
|
405
|
+
# Then we can get the first chunk by indexing read with 0: reader.read[0]
|
406
|
+
treatment = reader.read
|
407
|
+
# p treatment
|
408
|
+
|
409
|
+
assert_equal("11", treatment["placebo.1.1"][:base])
|
410
|
+
assert_equal("31", treatment["placebo.1.1"][:age])
|
411
|
+
assert_equal("5", treatment["placebo.1.1"][:"seizure.rate"])
|
412
|
+
|
413
|
+
assert_equal("11", treatment["placebo.1.2"][:base])
|
414
|
+
assert_equal("31", treatment["placebo.1.2"][:age])
|
415
|
+
assert_equal("3", treatment["placebo.1.2"][:"seizure.rate"])
|
416
|
+
|
417
|
+
end
|
418
|
+
|
419
|
+
#-------------------------------------------------------------------------------------
|
420
|
+
# All examples until now had chunk_size :all, but they can have smaller size. In this
|
421
|
+
# example, chunk_size is 20 and it is processed by a block
|
422
|
+
#-------------------------------------------------------------------------------------
|
423
|
+
|
424
|
+
should "read with dimension and given a block in critbit" do
|
425
|
+
|
426
|
+
reader = Jcsv.reader("../data/epilepsy.csv", format: :critbit, chunk_size: 20,
|
427
|
+
dimensions: [:treatment, :subject, :period], deep_map: true,
|
428
|
+
default_filter: Jcsv.int)
|
429
|
+
|
430
|
+
reader.mapping = {:patient => false}
|
431
|
+
|
432
|
+
reader.read do |line_no, row_no, chunk|
|
433
|
+
p line_no
|
434
|
+
p row_no
|
435
|
+
p chunk
|
436
|
+
end
|
437
|
+
|
438
|
+
end
|
439
|
+
|
440
|
+
end
|
441
|
+
|
442
|
+
end
|