mdarray-jcsv 0.6.3-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +23 -0
  3. data/README.md +2 -0
  4. data/Rakefile +46 -0
  5. data/config.rb +104 -0
  6. data/lib/constraints.rb +205 -0
  7. data/lib/date_filters.rb +252 -0
  8. data/lib/dimensions.rb +276 -0
  9. data/lib/filters.rb +332 -0
  10. data/lib/jcsv.rb +107 -0
  11. data/lib/list_reader.rb +200 -0
  12. data/lib/locale.rb +192 -0
  13. data/lib/map_reader.rb +192 -0
  14. data/lib/mdarray-jcsv.rb +24 -0
  15. data/lib/mdarray_reader.rb +110 -0
  16. data/lib/numeric_filters.rb +225 -0
  17. data/lib/reader.rb +547 -0
  18. data/lib/supercsv_interface.rb +231 -0
  19. data/test/test_complete.rb +37 -0
  20. data/test/test_critbit.rb +442 -0
  21. data/test/test_customer_list.rb +436 -0
  22. data/test/test_customer_map.rb +209 -0
  23. data/test/test_customer_nhlist.rb +161 -0
  24. data/test/test_deep_map.rb +264 -0
  25. data/test/test_del.rb +73 -0
  26. data/test/test_dimensions.rb +231 -0
  27. data/test/test_example.rb +79 -0
  28. data/test/test_filters.rb +374 -0
  29. data/test/test_list_dimensions.rb +110 -0
  30. data/test/test_mdarray.rb +227 -0
  31. data/test/test_missing_data.rb +57 -0
  32. data/vendor/commons-beanutils-1.8.3.jar +0 -0
  33. data/vendor/commons-lang3-3.1.jar +0 -0
  34. data/vendor/dozer-5.4.0.jar +0 -0
  35. data/vendor/jcl-over-slf4j-1.6.6.jar +0 -0
  36. data/vendor/joda-time-2.7.jar +0 -0
  37. data/vendor/slf4j-api-1.7.5.jar +0 -0
  38. data/vendor/snakeyaml-1.14.jar +0 -0
  39. data/vendor/super-csv-2.4.0.jar +0 -0
  40. data/vendor/super-csv-dozer-2.4.0.jar +0 -0
  41. data/vendor/super-csv-java8-2.4.0.jar +0 -0
  42. data/vendor/super-csv-joda-2.4.0.jar +0 -0
  43. data/version.rb +2 -0
  44. metadata +196 -0
@@ -0,0 +1,276 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ ##########################################################################################
4
+ # @author Rodrigo Botafogo
5
+ #
6
+ # Copyright © 2015 Rodrigo Botafogo. All Rights Reserved. Permission to use, copy, modify,
7
+ # and distribute this software and its documentation, without fee and without a signed
8
+ # licensing agreement, is hereby granted, provided that the above copyright notice, this
9
+ # paragraph and the following two paragraphs appear in all copies, modifications, and
10
+ # distributions.
11
+ #
12
+ # IN NO EVENT SHALL RODRIGO BOTAFOGO BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
13
+ # INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
14
+ # THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF RODRIGO BOTAFOGO HAS BEEN ADVISED OF THE
15
+ # POSSIBILITY OF SUCH DAMAGE.
16
+ #
17
+ # RODRIGO BOTAFOGO SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
18
+ # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
19
+ # SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS".
20
+ # RODRIGO BOTAFOGO HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS,
21
+ # OR MODIFICATIONS.
22
+ ##########################################################################################
23
+
24
+ class Jcsv
25
+
26
+ private
27
+
28
+ ##########################################################################################
29
+ # Class Dimension keeps track of all data dimensions in a CSV file. A data dimension is
30
+ # similar to a mathematical dimension such as x, y or z. In principle, every data should
31
+ # be associates with only one set of data dimensions. For example, let's say that our
32
+ # data has an employee ID, then column ID defines a dimension on the data, since every
33
+ # employee has a one ID and every ID is associated with only one employee. As another
34
+ # example, let's say that we have data about a medical experiment that was done with a
35
+ # set of patients for 4 weeks, which were given either a medicine of a placebo. The
36
+ # data could have columns labeled: "Patient Index", "Week", "Type of Medicine", "Blood
37
+ # Sample". Some entries would be:
38
+ #
39
+ # "Patient Index" "Week" "Type of Medicine" "Blood Sample"
40
+ # 1 1 Placebo xxxx
41
+ # 1 2 Placebo xxxx
42
+ # 2 1 med1 xxxx
43
+ # 2 2 med1 xxxx
44
+ #
45
+ # "Patient Index", "Week", "Type of Medice" are three dimensions of this data and taken
46
+ # together unequivocally define the data, i.e., those dimensions are similar to a DB key.
47
+ # Since this is a key, there should be no other line of data with the same values in
48
+ # the dimensions.
49
+ #
50
+ # CSV files are not ideal for maintaining dimensions, so, in order to read dimensions
51
+ # in a CSV file, there is the need for some rules.
52
+ ##########################################################################################
53
+
54
+ class Dimension
55
+
56
+ attr_reader :name
57
+ attr_reader :frozen
58
+ attr_reader :current_value
59
+ attr_reader :next_value
60
+ attr_reader :labels
61
+ attr_accessor :index # column index of this dimension in the csv file
62
+
63
+ #------------------------------------------------------------------------------------
64
+ # dim_name is the dimension name.
65
+ #------------------------------------------------------------------------------------
66
+
67
+ def initialize(dim_name)
68
+ @name = dim_name
69
+ @frozen = false
70
+ @next_value = 0
71
+ @max_value = 0
72
+ @labels = Hash.new
73
+ end
74
+
75
+ #------------------------------------------------------------------------------------
76
+ #
77
+ #------------------------------------------------------------------------------------
78
+
79
+ def size
80
+ @labels.size
81
+ end
82
+
83
+ alias :length :size
84
+
85
+ #------------------------------------------------------------------------------------
86
+ # Adds a new label to this dimension and keeps track of its index. Labels are
87
+ # indexed starting at 0 and always incrementing. All labels in the dimension are
88
+ # distinct. If trying to add a label that already exists, will:
89
+ # * add it if it is a new label and return its index;
90
+ # * return the index of an already existing label if the index is non-decreasing and
91
+ # monotonically increasing or if it is back to 0. That is, if the last returned
92
+ # index is 5, then the next index is either 5 or 6 (new label), or 0.
93
+ # * If the last returned index is 0, then the dimension becomes frozen and no more
94
+ # labels can be added to it. After this point, add_label has to be called always
95
+ # in the same order that it was called previously.
96
+ #------------------------------------------------------------------------------------
97
+
98
+ def add_label(label)
99
+
100
+ if (@labels.has_key?(label))
101
+ # Just read one more line with the same label. No problem, keep reading
102
+ if (@labels[label] == @current_value)
103
+
104
+ elsif (@labels[label] == @next_value)
105
+ # Reading next label
106
+ @current_value = @next_value
107
+ @next_value = (@next_value + 1) % (@max_value + 1)
108
+ elsif (@labels[label] < @current_value && @labels[label] == 0)
109
+ # reached the last label and going back to the first one
110
+ reset
111
+ return true
112
+ else
113
+ # Label read is out of order. Expected value is either 0 (starting over) or
114
+ # the next value. Although we raise an exception, we allow the calling method
115
+ # to catch the exception and let the program still run.
116
+ expected_value = (@labels[label] < @current_value)? 0 : @next_value
117
+ reset if @labels[label] < @current_value
118
+ @current_value = @labels[label] + 1
119
+ @next_value = @current_value + 1
120
+ raise "Missing data: next expected label was '#{@labels.key(expected_value)}' but read '#{label}'."
121
+ end
122
+ else
123
+ @current_value = @labels[label] = @next_value
124
+ @next_value += 1
125
+ # Trying to add a label when the dimension is frozen raises an exception
126
+ raise "Dimension '#{@name}' is frozen when adding label '#{label}'." if frozen
127
+ end
128
+
129
+ false
130
+
131
+ end
132
+
133
+ #------------------------------------------------------------------------------------
134
+ #
135
+ #------------------------------------------------------------------------------------
136
+
137
+ def reset
138
+ if !@frozen
139
+ @frozen = true
140
+ @max_value = @current_value
141
+ @current_value = 0
142
+ @next_value = 1
143
+ end
144
+ end
145
+
146
+ #------------------------------------------------------------------------------------
147
+ #
148
+ #------------------------------------------------------------------------------------
149
+
150
+ def index(label)
151
+ @labels[label]
152
+ end
153
+
154
+ #------------------------------------------------------------------------------------
155
+ #
156
+ #------------------------------------------------------------------------------------
157
+
158
+ def[](label)
159
+ index(label)
160
+ end
161
+
162
+ end
163
+
164
+ ##########################################################################################
165
+ #
166
+ ##########################################################################################
167
+
168
+ class Dimensions
169
+
170
+ attr_reader :dimensions_names
171
+ attr_reader :dimensions
172
+ attr_reader :rank
173
+
174
+ #------------------------------------------------------------------------------------
175
+ # dimensions is an array of column names that will be used as dimensions
176
+ #------------------------------------------------------------------------------------
177
+
178
+ def initialize(dimensions_names)
179
+
180
+ @dimensions_names = dimensions_names
181
+ @rank = @dimensions_names.size
182
+ @dimensions = Hash.new
183
+
184
+ @dimensions_names.each do |dim_name|
185
+ @dimensions[dim_name] = Dimension.new(dim_name)
186
+ end
187
+
188
+ end
189
+
190
+ #------------------------------------------------------------------------------------
191
+ #
192
+ #------------------------------------------------------------------------------------
193
+
194
+ def length(dim_name)
195
+ @dimensions[dim_name].labels.size
196
+ end
197
+
198
+ alias :size :length
199
+
200
+ #------------------------------------------------------------------------------------
201
+ #
202
+ #------------------------------------------------------------------------------------
203
+
204
+ def labels(dim_name)
205
+ @dimensions[dim_name].labels
206
+ end
207
+
208
+ #------------------------------------------------------------------------------------
209
+ #
210
+ #------------------------------------------------------------------------------------
211
+
212
+ def shape
213
+
214
+ sh = Array.new
215
+ @dimensions_names.each do |dim_name|
216
+ sh << length(dim_name)
217
+ end
218
+ sh
219
+
220
+ end
221
+
222
+ #------------------------------------------------------------------------------------
223
+ #
224
+ #------------------------------------------------------------------------------------
225
+
226
+ def add_label(dim_name, label)
227
+
228
+ should_reset = @dimensions[dim_name].add_label(label)
229
+ @dimensions[dim_name].reset if should_reset
230
+
231
+ (@dimensions_names.index(dim_name)...@dimensions_names.size).each do |i|
232
+ name = @dimensions_names[i]
233
+ @dimensions[name].reset
234
+ end if should_reset
235
+
236
+ end
237
+
238
+ #------------------------------------------------------------------------------------
239
+ #
240
+ #------------------------------------------------------------------------------------
241
+
242
+ def[]=(dim_name, label)
243
+ add_label(dim_name, label)
244
+ end
245
+
246
+ #------------------------------------------------------------------------------------
247
+ #
248
+ #------------------------------------------------------------------------------------
249
+
250
+ def dimension(name)
251
+ @dimensions[name]
252
+ end
253
+
254
+ #------------------------------------------------------------------------------------
255
+ #
256
+ #------------------------------------------------------------------------------------
257
+
258
+ def[](name)
259
+ @dimensions[name]
260
+ end
261
+
262
+ #------------------------------------------------------------------------------------
263
+ #
264
+ #------------------------------------------------------------------------------------
265
+
266
+ def each
267
+
268
+ @dimensions_names.each do |name|
269
+ yield @dimensions[name]
270
+ end
271
+
272
+ end
273
+
274
+ end
275
+
276
+ end
@@ -0,0 +1,332 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ ##########################################################################################
4
+ # @author Rodrigo Botafogo
5
+ #
6
+ # Copyright © 2015 Rodrigo Botafogo. All Rights Reserved. Permission to use, copy, modify,
7
+ # and distribute this software and its documentation, without fee and without a signed
8
+ # licensing agreement, is hereby granted, provided that the above copyright notice, this
9
+ # paragraph and the following two paragraphs appear in all copies, modifications, and
10
+ # distributions.
11
+ #
12
+ # IN NO EVENT SHALL RODRIGO BOTAFOGO BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
13
+ # INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
14
+ # THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF RODRIGO BOTAFOGO HAS BEEN ADVISED OF THE
15
+ # POSSIBILITY OF SUCH DAMAGE.
16
+ #
17
+ # RODRIGO BOTAFOGO SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
18
+ # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
19
+ # SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS".
20
+ # RODRIGO BOTAFOGO HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS,
21
+ # OR MODIFICATIONS.
22
+ ##########################################################################################
23
+
24
+ require 'bigdecimal'
25
+ require 'ipaddr'
26
+
27
+ require_relative 'locale'
28
+
29
+ class Jcsv
30
+
31
+ #========================================================================================
32
+ #
33
+ #========================================================================================
34
+
35
+ class Error < RuntimeError
36
+
37
+ end
38
+
39
+ class FilterError < Jcsv::Error
40
+
41
+ end
42
+
43
+ class ConstraintViolation < Jcsv::Error
44
+
45
+ end
46
+
47
+ class MissingHeadersError < Jcsv::Error
48
+
49
+ end
50
+
51
+ class DuplicateKeyError < Jcsv::Error
52
+
53
+ end
54
+
55
+ #========================================================================================
56
+ #
57
+ #========================================================================================
58
+
59
+ module NextFilter
60
+
61
+ # This object's next filter
62
+ attr_accessor :next_filter
63
+
64
+ # last_filter is a variable that points to the last filter in the sequence of
65
+ # filters. It is necessary to build the linked list of filters
66
+ attr_accessor :last_filter
67
+
68
+ #---------------------------------------------------------------------------------------
69
+ # Method >> is used to link one filter to the next filter. Basically we keep a linked
70
+ # list of filters.
71
+ #---------------------------------------------------------------------------------------
72
+
73
+ def >>(next_filter)
74
+ if (@next_filter.nil?)
75
+ @next_filter = next_filter
76
+ # this check is necessary in the following case: a >> (b >> c) >> d. In
77
+ # principle one has no reason to use parenthesis, but if done, then this check
78
+ # should make everything still work fine
79
+ @last_filter = (next_filter.last_filter.nil?)? @next_filter :
80
+ @next_filter.last_filter
81
+ else
82
+ @last_filter.next_filter = next_filter
83
+ @last_filter = next_filter
84
+ end
85
+ self
86
+ end
87
+
88
+ #---------------------------------------------------------------------------------------
89
+ # Executes the next filter
90
+ #---------------------------------------------------------------------------------------
91
+
92
+ def exec_next(value, context)
93
+ @next_filter? @next_filter.execute(value, context) : value
94
+ end
95
+
96
+ end
97
+
98
+ #========================================================================================
99
+ #
100
+ #========================================================================================
101
+
102
+ class Filter < org.supercsv.cellprocessor.CellProcessorAdaptor
103
+ include org.supercsv.cellprocessor.ift.LongCellProcessor
104
+ include org.supercsv.cellprocessor.ift.DoubleCellProcessor
105
+ include org.supercsv.cellprocessor.ift.StringCellProcessor
106
+ include NextFilter
107
+
108
+ end
109
+
110
+ #========================================================================================
111
+ #
112
+ #========================================================================================
113
+
114
+ class RBParseBool < org.supercsv.cellprocessor.ParseBool
115
+ include NextFilter
116
+
117
+ def initialize(true_values, false_values, ignore_case)
118
+ true_values = true_values.to_java(:string)
119
+ false_values = false_values.to_java(:string)
120
+ super(true_values, false_values, ignore_case)
121
+ end
122
+
123
+ def execute(value, context)
124
+ begin
125
+ exec_next(super(value, context), context)
126
+ rescue org.supercsv.exception.SuperCsvCellProcessorException => e
127
+ raise FilterError.new("#{e.message} in:\n #{context}")
128
+ end
129
+
130
+ end
131
+
132
+ end
133
+
134
+ #========================================================================================
135
+ #
136
+ #========================================================================================
137
+
138
+ class RBConvertNilTo < Filter
139
+ attr_reader :value
140
+
141
+ def initialize(value)
142
+ @value = value
143
+ super()
144
+ end
145
+
146
+ def execute(value, context)
147
+ val = (value)? value : @value
148
+ exec_next(val, context)
149
+ end
150
+
151
+ end
152
+
153
+ #========================================================================================
154
+ #
155
+ #========================================================================================
156
+
157
+ class RBOptional < Filter
158
+
159
+ def execute(value, context)
160
+ (value)? exec_next(value, context) : value
161
+ end
162
+
163
+ end
164
+
165
+ #========================================================================================
166
+ #
167
+ #========================================================================================
168
+
169
+ class RBParseChar < org.supercsv.cellprocessor.ParseChar
170
+ include NextFilter
171
+
172
+ def initialize
173
+ super()
174
+ end
175
+
176
+ def execute(value, context)
177
+ begin
178
+ exec_next(super(value, context), context)
179
+ rescue org.supercsv.exception.SuperCsvCellProcessorException => e
180
+ raise FilterError.new("#{e.message} in:\n #{context}")
181
+ end
182
+ end
183
+
184
+ end
185
+
186
+ #========================================================================================
187
+ #
188
+ #========================================================================================
189
+
190
+ class RBCollector < Filter
191
+
192
+ attr_reader :collection
193
+
194
+ def initialize
195
+ @collection = []
196
+ super()
197
+ end
198
+
199
+ def execute(value, context)
200
+ validateInputNotNull(value, context)
201
+ @collection << value
202
+ exec_next(value, context)
203
+ end
204
+
205
+ end
206
+
207
+ #========================================================================================
208
+ #
209
+ #========================================================================================
210
+
211
+ class RBIPAddr < Filter
212
+
213
+ def execute(value, context)
214
+ validateInputNotNull(value, context)
215
+ value = IPAddr.new(value)
216
+ exec_next(value, context)
217
+ end
218
+
219
+ end
220
+
221
+ #========================================================================================
222
+ #
223
+ #========================================================================================
224
+
225
+ class RBDynamic < Filter
226
+
227
+ def initialize(block: nil)
228
+ @block = block
229
+ super()
230
+ end
231
+
232
+ def execute(value, context)
233
+ value = @block.call(value, context) if @block
234
+ exec_next(value, context)
235
+ end
236
+
237
+ end
238
+
239
+ #========================================================================================
240
+ #
241
+ #========================================================================================
242
+
243
+ class RBGsub < Filter
244
+
245
+ def initialize(*args, block: nil)
246
+ @args = args
247
+ @block = block
248
+ super()
249
+ end
250
+
251
+ def execute(value, context)
252
+ value = (@block)? @block.call(value, *(@args)) : value.gsub(*(@args))
253
+ exec_next(value, context)
254
+ end
255
+
256
+ end
257
+
258
+ #========================================================================================
259
+ #
260
+ #========================================================================================
261
+
262
+ class RBStringGeneric < Filter
263
+
264
+ def initialize(function, *args, block: nil)
265
+ @function = function
266
+ @args = args
267
+ @block = block
268
+ super()
269
+ end
270
+
271
+ def execute(value, context)
272
+ value = value.send(@function, *(@args), &(@block))
273
+ exec_next(value, context)
274
+ end
275
+
276
+ end
277
+
278
+ #========================================================================================
279
+ #
280
+ #========================================================================================
281
+
282
+ def self.bool(true_values: ["true", "1", "y", "t"],
283
+ false_values: ["false", "n", "0", "f"],
284
+ ignore_case: true)
285
+ RBParseBool.new(true_values, false_values, ignore_case)
286
+ end
287
+
288
+ def self.convert_nil_to(value)
289
+ RBConvertNilTo.new(value)
290
+ end
291
+
292
+ def self.optional
293
+ RBOptional.new
294
+ end
295
+
296
+ def self.char
297
+ RBParseChar.new
298
+ end
299
+
300
+ def self.collector
301
+ RBCollector.new
302
+ end
303
+
304
+ def self.ipaddr
305
+ RBIPAddr.new
306
+ end
307
+
308
+ def self.dynamic(*args, &block)
309
+ RBDynamic.new(*args, block: block)
310
+ end
311
+
312
+ def self.gsub(*args, &block)
313
+ RBGsub.new(*args, block: block)
314
+ end
315
+
316
+ def self.str(function, *args, &block)
317
+ RBStringGeneric.new(function, *args, block: block)
318
+ end
319
+
320
+ end
321
+
322
+ require_relative 'date_filters'
323
+ require_relative 'numeric_filters'
324
+ require_relative 'constraints'
325
+
326
+
327
+ =begin
328
+ # class Java::OrgSupercsvCellprocessor::CellProcessorAdaptor
329
+ class Java::OrgSupercsvCellprocessor::CellProcessorAdaptor
330
+ field_reader :next
331
+ end
332
+ =end