mdarray-jcsv 0.6.3-java

Sign up to get free protection for your applications and to get access to all the features.
Files changed (44) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +23 -0
  3. data/README.md +2 -0
  4. data/Rakefile +46 -0
  5. data/config.rb +104 -0
  6. data/lib/constraints.rb +205 -0
  7. data/lib/date_filters.rb +252 -0
  8. data/lib/dimensions.rb +276 -0
  9. data/lib/filters.rb +332 -0
  10. data/lib/jcsv.rb +107 -0
  11. data/lib/list_reader.rb +200 -0
  12. data/lib/locale.rb +192 -0
  13. data/lib/map_reader.rb +192 -0
  14. data/lib/mdarray-jcsv.rb +24 -0
  15. data/lib/mdarray_reader.rb +110 -0
  16. data/lib/numeric_filters.rb +225 -0
  17. data/lib/reader.rb +547 -0
  18. data/lib/supercsv_interface.rb +231 -0
  19. data/test/test_complete.rb +37 -0
  20. data/test/test_critbit.rb +442 -0
  21. data/test/test_customer_list.rb +436 -0
  22. data/test/test_customer_map.rb +209 -0
  23. data/test/test_customer_nhlist.rb +161 -0
  24. data/test/test_deep_map.rb +264 -0
  25. data/test/test_del.rb +73 -0
  26. data/test/test_dimensions.rb +231 -0
  27. data/test/test_example.rb +79 -0
  28. data/test/test_filters.rb +374 -0
  29. data/test/test_list_dimensions.rb +110 -0
  30. data/test/test_mdarray.rb +227 -0
  31. data/test/test_missing_data.rb +57 -0
  32. data/vendor/commons-beanutils-1.8.3.jar +0 -0
  33. data/vendor/commons-lang3-3.1.jar +0 -0
  34. data/vendor/dozer-5.4.0.jar +0 -0
  35. data/vendor/jcl-over-slf4j-1.6.6.jar +0 -0
  36. data/vendor/joda-time-2.7.jar +0 -0
  37. data/vendor/slf4j-api-1.7.5.jar +0 -0
  38. data/vendor/snakeyaml-1.14.jar +0 -0
  39. data/vendor/super-csv-2.4.0.jar +0 -0
  40. data/vendor/super-csv-dozer-2.4.0.jar +0 -0
  41. data/vendor/super-csv-java8-2.4.0.jar +0 -0
  42. data/vendor/super-csv-joda-2.4.0.jar +0 -0
  43. data/version.rb +2 -0
  44. metadata +196 -0
@@ -0,0 +1,276 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ ##########################################################################################
4
+ # @author Rodrigo Botafogo
5
+ #
6
+ # Copyright © 2015 Rodrigo Botafogo. All Rights Reserved. Permission to use, copy, modify,
7
+ # and distribute this software and its documentation, without fee and without a signed
8
+ # licensing agreement, is hereby granted, provided that the above copyright notice, this
9
+ # paragraph and the following two paragraphs appear in all copies, modifications, and
10
+ # distributions.
11
+ #
12
+ # IN NO EVENT SHALL RODRIGO BOTAFOGO BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
13
+ # INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
14
+ # THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF RODRIGO BOTAFOGO HAS BEEN ADVISED OF THE
15
+ # POSSIBILITY OF SUCH DAMAGE.
16
+ #
17
+ # RODRIGO BOTAFOGO SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
18
+ # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
19
+ # SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS".
20
+ # RODRIGO BOTAFOGO HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS,
21
+ # OR MODIFICATIONS.
22
+ ##########################################################################################
23
+
24
+ class Jcsv
25
+
26
+ private
27
+
28
+ ##########################################################################################
29
+ # Class Dimension keeps track of all data dimensions in a CSV file. A data dimension is
30
+ # similar to a mathematical dimension such as x, y or z. In principle, every data should
31
+ # be associates with only one set of data dimensions. For example, let's say that our
32
+ # data has an employee ID, then column ID defines a dimension on the data, since every
33
+ # employee has a one ID and every ID is associated with only one employee. As another
34
+ # example, let's say that we have data about a medical experiment that was done with a
35
+ # set of patients for 4 weeks, which were given either a medicine of a placebo. The
36
+ # data could have columns labeled: "Patient Index", "Week", "Type of Medicine", "Blood
37
+ # Sample". Some entries would be:
38
+ #
39
+ # "Patient Index" "Week" "Type of Medicine" "Blood Sample"
40
+ # 1 1 Placebo xxxx
41
+ # 1 2 Placebo xxxx
42
+ # 2 1 med1 xxxx
43
+ # 2 2 med1 xxxx
44
+ #
45
+ # "Patient Index", "Week", "Type of Medice" are three dimensions of this data and taken
46
+ # together unequivocally define the data, i.e., those dimensions are similar to a DB key.
47
+ # Since this is a key, there should be no other line of data with the same values in
48
+ # the dimensions.
49
+ #
50
+ # CSV files are not ideal for maintaining dimensions, so, in order to read dimensions
51
+ # in a CSV file, there is the need for some rules.
52
+ ##########################################################################################
53
+
54
+ class Dimension
55
+
56
+ attr_reader :name
57
+ attr_reader :frozen
58
+ attr_reader :current_value
59
+ attr_reader :next_value
60
+ attr_reader :labels
61
+ attr_accessor :index # column index of this dimension in the csv file
62
+
63
+ #------------------------------------------------------------------------------------
64
+ # dim_name is the dimension name.
65
+ #------------------------------------------------------------------------------------
66
+
67
+ def initialize(dim_name)
68
+ @name = dim_name
69
+ @frozen = false
70
+ @next_value = 0
71
+ @max_value = 0
72
+ @labels = Hash.new
73
+ end
74
+
75
+ #------------------------------------------------------------------------------------
76
+ #
77
+ #------------------------------------------------------------------------------------
78
+
79
+ def size
80
+ @labels.size
81
+ end
82
+
83
+ alias :length :size
84
+
85
+ #------------------------------------------------------------------------------------
86
+ # Adds a new label to this dimension and keeps track of its index. Labels are
87
+ # indexed starting at 0 and always incrementing. All labels in the dimension are
88
+ # distinct. If trying to add a label that already exists, will:
89
+ # * add it if it is a new label and return its index;
90
+ # * return the index of an already existing label if the index is non-decreasing and
91
+ # monotonically increasing or if it is back to 0. That is, if the last returned
92
+ # index is 5, then the next index is either 5 or 6 (new label), or 0.
93
+ # * If the last returned index is 0, then the dimension becomes frozen and no more
94
+ # labels can be added to it. After this point, add_label has to be called always
95
+ # in the same order that it was called previously.
96
+ #------------------------------------------------------------------------------------
97
+
98
+ def add_label(label)
99
+
100
+ if (@labels.has_key?(label))
101
+ # Just read one more line with the same label. No problem, keep reading
102
+ if (@labels[label] == @current_value)
103
+
104
+ elsif (@labels[label] == @next_value)
105
+ # Reading next label
106
+ @current_value = @next_value
107
+ @next_value = (@next_value + 1) % (@max_value + 1)
108
+ elsif (@labels[label] < @current_value && @labels[label] == 0)
109
+ # reached the last label and going back to the first one
110
+ reset
111
+ return true
112
+ else
113
+ # Label read is out of order. Expected value is either 0 (starting over) or
114
+ # the next value. Although we raise an exception, we allow the calling method
115
+ # to catch the exception and let the program still run.
116
+ expected_value = (@labels[label] < @current_value)? 0 : @next_value
117
+ reset if @labels[label] < @current_value
118
+ @current_value = @labels[label] + 1
119
+ @next_value = @current_value + 1
120
+ raise "Missing data: next expected label was '#{@labels.key(expected_value)}' but read '#{label}'."
121
+ end
122
+ else
123
+ @current_value = @labels[label] = @next_value
124
+ @next_value += 1
125
+ # Trying to add a label when the dimension is frozen raises an exception
126
+ raise "Dimension '#{@name}' is frozen when adding label '#{label}'." if frozen
127
+ end
128
+
129
+ false
130
+
131
+ end
132
+
133
+ #------------------------------------------------------------------------------------
134
+ #
135
+ #------------------------------------------------------------------------------------
136
+
137
+ def reset
138
+ if !@frozen
139
+ @frozen = true
140
+ @max_value = @current_value
141
+ @current_value = 0
142
+ @next_value = 1
143
+ end
144
+ end
145
+
146
+ #------------------------------------------------------------------------------------
147
+ #
148
+ #------------------------------------------------------------------------------------
149
+
150
+ def index(label)
151
+ @labels[label]
152
+ end
153
+
154
+ #------------------------------------------------------------------------------------
155
+ #
156
+ #------------------------------------------------------------------------------------
157
+
158
+ def[](label)
159
+ index(label)
160
+ end
161
+
162
+ end
163
+
164
+ ##########################################################################################
165
+ #
166
+ ##########################################################################################
167
+
168
+ class Dimensions
169
+
170
+ attr_reader :dimensions_names
171
+ attr_reader :dimensions
172
+ attr_reader :rank
173
+
174
+ #------------------------------------------------------------------------------------
175
+ # dimensions is an array of column names that will be used as dimensions
176
+ #------------------------------------------------------------------------------------
177
+
178
+ def initialize(dimensions_names)
179
+
180
+ @dimensions_names = dimensions_names
181
+ @rank = @dimensions_names.size
182
+ @dimensions = Hash.new
183
+
184
+ @dimensions_names.each do |dim_name|
185
+ @dimensions[dim_name] = Dimension.new(dim_name)
186
+ end
187
+
188
+ end
189
+
190
+ #------------------------------------------------------------------------------------
191
+ #
192
+ #------------------------------------------------------------------------------------
193
+
194
+ def length(dim_name)
195
+ @dimensions[dim_name].labels.size
196
+ end
197
+
198
+ alias :size :length
199
+
200
+ #------------------------------------------------------------------------------------
201
+ #
202
+ #------------------------------------------------------------------------------------
203
+
204
+ def labels(dim_name)
205
+ @dimensions[dim_name].labels
206
+ end
207
+
208
+ #------------------------------------------------------------------------------------
209
+ #
210
+ #------------------------------------------------------------------------------------
211
+
212
+ def shape
213
+
214
+ sh = Array.new
215
+ @dimensions_names.each do |dim_name|
216
+ sh << length(dim_name)
217
+ end
218
+ sh
219
+
220
+ end
221
+
222
+ #------------------------------------------------------------------------------------
223
+ #
224
+ #------------------------------------------------------------------------------------
225
+
226
+ def add_label(dim_name, label)
227
+
228
+ should_reset = @dimensions[dim_name].add_label(label)
229
+ @dimensions[dim_name].reset if should_reset
230
+
231
+ (@dimensions_names.index(dim_name)...@dimensions_names.size).each do |i|
232
+ name = @dimensions_names[i]
233
+ @dimensions[name].reset
234
+ end if should_reset
235
+
236
+ end
237
+
238
+ #------------------------------------------------------------------------------------
239
+ #
240
+ #------------------------------------------------------------------------------------
241
+
242
+ def[]=(dim_name, label)
243
+ add_label(dim_name, label)
244
+ end
245
+
246
+ #------------------------------------------------------------------------------------
247
+ #
248
+ #------------------------------------------------------------------------------------
249
+
250
+ def dimension(name)
251
+ @dimensions[name]
252
+ end
253
+
254
+ #------------------------------------------------------------------------------------
255
+ #
256
+ #------------------------------------------------------------------------------------
257
+
258
+ def[](name)
259
+ @dimensions[name]
260
+ end
261
+
262
+ #------------------------------------------------------------------------------------
263
+ #
264
+ #------------------------------------------------------------------------------------
265
+
266
+ def each
267
+
268
+ @dimensions_names.each do |name|
269
+ yield @dimensions[name]
270
+ end
271
+
272
+ end
273
+
274
+ end
275
+
276
+ end
@@ -0,0 +1,332 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ ##########################################################################################
4
+ # @author Rodrigo Botafogo
5
+ #
6
+ # Copyright © 2015 Rodrigo Botafogo. All Rights Reserved. Permission to use, copy, modify,
7
+ # and distribute this software and its documentation, without fee and without a signed
8
+ # licensing agreement, is hereby granted, provided that the above copyright notice, this
9
+ # paragraph and the following two paragraphs appear in all copies, modifications, and
10
+ # distributions.
11
+ #
12
+ # IN NO EVENT SHALL RODRIGO BOTAFOGO BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
13
+ # INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
14
+ # THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF RODRIGO BOTAFOGO HAS BEEN ADVISED OF THE
15
+ # POSSIBILITY OF SUCH DAMAGE.
16
+ #
17
+ # RODRIGO BOTAFOGO SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
18
+ # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
19
+ # SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS".
20
+ # RODRIGO BOTAFOGO HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS,
21
+ # OR MODIFICATIONS.
22
+ ##########################################################################################
23
+
24
+ require 'bigdecimal'
25
+ require 'ipaddr'
26
+
27
+ require_relative 'locale'
28
+
29
+ class Jcsv
30
+
31
+ #========================================================================================
32
+ #
33
+ #========================================================================================
34
+
35
+ class Error < RuntimeError
36
+
37
+ end
38
+
39
+ class FilterError < Jcsv::Error
40
+
41
+ end
42
+
43
+ class ConstraintViolation < Jcsv::Error
44
+
45
+ end
46
+
47
+ class MissingHeadersError < Jcsv::Error
48
+
49
+ end
50
+
51
+ class DuplicateKeyError < Jcsv::Error
52
+
53
+ end
54
+
55
+ #========================================================================================
56
+ #
57
+ #========================================================================================
58
+
59
+ module NextFilter
60
+
61
+ # This object's next filter
62
+ attr_accessor :next_filter
63
+
64
+ # last_filter is a variable that points to the last filter in the sequence of
65
+ # filters. It is necessary to build the linked list of filters
66
+ attr_accessor :last_filter
67
+
68
+ #---------------------------------------------------------------------------------------
69
+ # Method >> is used to link one filter to the next filter. Basically we keep a linked
70
+ # list of filters.
71
+ #---------------------------------------------------------------------------------------
72
+
73
+ def >>(next_filter)
74
+ if (@next_filter.nil?)
75
+ @next_filter = next_filter
76
+ # this check is necessary in the following case: a >> (b >> c) >> d. In
77
+ # principle one has no reason to use parenthesis, but if done, then this check
78
+ # should make everything still work fine
79
+ @last_filter = (next_filter.last_filter.nil?)? @next_filter :
80
+ @next_filter.last_filter
81
+ else
82
+ @last_filter.next_filter = next_filter
83
+ @last_filter = next_filter
84
+ end
85
+ self
86
+ end
87
+
88
+ #---------------------------------------------------------------------------------------
89
+ # Executes the next filter
90
+ #---------------------------------------------------------------------------------------
91
+
92
+ def exec_next(value, context)
93
+ @next_filter? @next_filter.execute(value, context) : value
94
+ end
95
+
96
+ end
97
+
98
+ #========================================================================================
99
+ #
100
+ #========================================================================================
101
+
102
+ class Filter < org.supercsv.cellprocessor.CellProcessorAdaptor
103
+ include org.supercsv.cellprocessor.ift.LongCellProcessor
104
+ include org.supercsv.cellprocessor.ift.DoubleCellProcessor
105
+ include org.supercsv.cellprocessor.ift.StringCellProcessor
106
+ include NextFilter
107
+
108
+ end
109
+
110
+ #========================================================================================
111
+ #
112
+ #========================================================================================
113
+
114
+ class RBParseBool < org.supercsv.cellprocessor.ParseBool
115
+ include NextFilter
116
+
117
+ def initialize(true_values, false_values, ignore_case)
118
+ true_values = true_values.to_java(:string)
119
+ false_values = false_values.to_java(:string)
120
+ super(true_values, false_values, ignore_case)
121
+ end
122
+
123
+ def execute(value, context)
124
+ begin
125
+ exec_next(super(value, context), context)
126
+ rescue org.supercsv.exception.SuperCsvCellProcessorException => e
127
+ raise FilterError.new("#{e.message} in:\n #{context}")
128
+ end
129
+
130
+ end
131
+
132
+ end
133
+
134
+ #========================================================================================
135
+ #
136
+ #========================================================================================
137
+
138
+ class RBConvertNilTo < Filter
139
+ attr_reader :value
140
+
141
+ def initialize(value)
142
+ @value = value
143
+ super()
144
+ end
145
+
146
+ def execute(value, context)
147
+ val = (value)? value : @value
148
+ exec_next(val, context)
149
+ end
150
+
151
+ end
152
+
153
+ #========================================================================================
154
+ #
155
+ #========================================================================================
156
+
157
+ class RBOptional < Filter
158
+
159
+ def execute(value, context)
160
+ (value)? exec_next(value, context) : value
161
+ end
162
+
163
+ end
164
+
165
+ #========================================================================================
166
+ #
167
+ #========================================================================================
168
+
169
+ class RBParseChar < org.supercsv.cellprocessor.ParseChar
170
+ include NextFilter
171
+
172
+ def initialize
173
+ super()
174
+ end
175
+
176
+ def execute(value, context)
177
+ begin
178
+ exec_next(super(value, context), context)
179
+ rescue org.supercsv.exception.SuperCsvCellProcessorException => e
180
+ raise FilterError.new("#{e.message} in:\n #{context}")
181
+ end
182
+ end
183
+
184
+ end
185
+
186
+ #========================================================================================
187
+ #
188
+ #========================================================================================
189
+
190
+ class RBCollector < Filter
191
+
192
+ attr_reader :collection
193
+
194
+ def initialize
195
+ @collection = []
196
+ super()
197
+ end
198
+
199
+ def execute(value, context)
200
+ validateInputNotNull(value, context)
201
+ @collection << value
202
+ exec_next(value, context)
203
+ end
204
+
205
+ end
206
+
207
+ #========================================================================================
208
+ #
209
+ #========================================================================================
210
+
211
+ class RBIPAddr < Filter
212
+
213
+ def execute(value, context)
214
+ validateInputNotNull(value, context)
215
+ value = IPAddr.new(value)
216
+ exec_next(value, context)
217
+ end
218
+
219
+ end
220
+
221
+ #========================================================================================
222
+ #
223
+ #========================================================================================
224
+
225
+ class RBDynamic < Filter
226
+
227
+ def initialize(block: nil)
228
+ @block = block
229
+ super()
230
+ end
231
+
232
+ def execute(value, context)
233
+ value = @block.call(value, context) if @block
234
+ exec_next(value, context)
235
+ end
236
+
237
+ end
238
+
239
+ #========================================================================================
240
+ #
241
+ #========================================================================================
242
+
243
+ class RBGsub < Filter
244
+
245
+ def initialize(*args, block: nil)
246
+ @args = args
247
+ @block = block
248
+ super()
249
+ end
250
+
251
+ def execute(value, context)
252
+ value = (@block)? @block.call(value, *(@args)) : value.gsub(*(@args))
253
+ exec_next(value, context)
254
+ end
255
+
256
+ end
257
+
258
+ #========================================================================================
259
+ #
260
+ #========================================================================================
261
+
262
+ class RBStringGeneric < Filter
263
+
264
+ def initialize(function, *args, block: nil)
265
+ @function = function
266
+ @args = args
267
+ @block = block
268
+ super()
269
+ end
270
+
271
+ def execute(value, context)
272
+ value = value.send(@function, *(@args), &(@block))
273
+ exec_next(value, context)
274
+ end
275
+
276
+ end
277
+
278
+ #========================================================================================
279
+ #
280
+ #========================================================================================
281
+
282
+ def self.bool(true_values: ["true", "1", "y", "t"],
283
+ false_values: ["false", "n", "0", "f"],
284
+ ignore_case: true)
285
+ RBParseBool.new(true_values, false_values, ignore_case)
286
+ end
287
+
288
+ def self.convert_nil_to(value)
289
+ RBConvertNilTo.new(value)
290
+ end
291
+
292
+ def self.optional
293
+ RBOptional.new
294
+ end
295
+
296
+ def self.char
297
+ RBParseChar.new
298
+ end
299
+
300
+ def self.collector
301
+ RBCollector.new
302
+ end
303
+
304
+ def self.ipaddr
305
+ RBIPAddr.new
306
+ end
307
+
308
+ def self.dynamic(*args, &block)
309
+ RBDynamic.new(*args, block: block)
310
+ end
311
+
312
+ def self.gsub(*args, &block)
313
+ RBGsub.new(*args, block: block)
314
+ end
315
+
316
+ def self.str(function, *args, &block)
317
+ RBStringGeneric.new(function, *args, block: block)
318
+ end
319
+
320
+ end
321
+
322
+ require_relative 'date_filters'
323
+ require_relative 'numeric_filters'
324
+ require_relative 'constraints'
325
+
326
+
327
+ =begin
328
+ # class Java::OrgSupercsvCellprocessor::CellProcessorAdaptor
329
+ class Java::OrgSupercsvCellprocessor::CellProcessorAdaptor
330
+ field_reader :next
331
+ end
332
+ =end