mdarray-jcsv 0.6.3-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +23 -0
  3. data/README.md +2 -0
  4. data/Rakefile +46 -0
  5. data/config.rb +104 -0
  6. data/lib/constraints.rb +205 -0
  7. data/lib/date_filters.rb +252 -0
  8. data/lib/dimensions.rb +276 -0
  9. data/lib/filters.rb +332 -0
  10. data/lib/jcsv.rb +107 -0
  11. data/lib/list_reader.rb +200 -0
  12. data/lib/locale.rb +192 -0
  13. data/lib/map_reader.rb +192 -0
  14. data/lib/mdarray-jcsv.rb +24 -0
  15. data/lib/mdarray_reader.rb +110 -0
  16. data/lib/numeric_filters.rb +225 -0
  17. data/lib/reader.rb +547 -0
  18. data/lib/supercsv_interface.rb +231 -0
  19. data/test/test_complete.rb +37 -0
  20. data/test/test_critbit.rb +442 -0
  21. data/test/test_customer_list.rb +436 -0
  22. data/test/test_customer_map.rb +209 -0
  23. data/test/test_customer_nhlist.rb +161 -0
  24. data/test/test_deep_map.rb +264 -0
  25. data/test/test_del.rb +73 -0
  26. data/test/test_dimensions.rb +231 -0
  27. data/test/test_example.rb +79 -0
  28. data/test/test_filters.rb +374 -0
  29. data/test/test_list_dimensions.rb +110 -0
  30. data/test/test_mdarray.rb +227 -0
  31. data/test/test_missing_data.rb +57 -0
  32. data/vendor/commons-beanutils-1.8.3.jar +0 -0
  33. data/vendor/commons-lang3-3.1.jar +0 -0
  34. data/vendor/dozer-5.4.0.jar +0 -0
  35. data/vendor/jcl-over-slf4j-1.6.6.jar +0 -0
  36. data/vendor/joda-time-2.7.jar +0 -0
  37. data/vendor/slf4j-api-1.7.5.jar +0 -0
  38. data/vendor/snakeyaml-1.14.jar +0 -0
  39. data/vendor/super-csv-2.4.0.jar +0 -0
  40. data/vendor/super-csv-dozer-2.4.0.jar +0 -0
  41. data/vendor/super-csv-java8-2.4.0.jar +0 -0
  42. data/vendor/super-csv-joda-2.4.0.jar +0 -0
  43. data/version.rb +2 -0
  44. metadata +196 -0
@@ -0,0 +1,192 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ ##########################################################################################
4
+ # author Rodrigo Botafogo
5
+ #
6
+ # Copyright © 2015 Rodrigo Botafogo. All Rights Reserved. Permission to use, copy, modify,
7
+ # and distribute this software and its documentation, without fee and without a signed
8
+ # licensing agreement, is hereby granted, provided that the above copyright notice, this
9
+ # paragraph and the following two paragraphs appear in all copies, modifications, and
10
+ # distributions.
11
+ #
12
+ # IN NO EVENT SHALL RODRIGO BOTAFOGO BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
13
+ # INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
14
+ # THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF RODRIGO BOTAFOGO HAS BEEN ADVISED OF THE
15
+ # POSSIBILITY OF SUCH DAMAGE.
16
+ #
17
+ # RODRIGO BOTAFOGO SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
18
+ # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
19
+ # SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS".
20
+ # RODRIGO BOTAFOGO HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS,
21
+ # OR MODIFICATIONS.
22
+ ##########################################################################################
23
+
24
+ require 'critbit'
25
+
26
+ class Jcsv
27
+
28
+ #========================================================================================
29
+ #
30
+ #========================================================================================
31
+
32
+ class MapReader < Reader
33
+ include_package "java.io"
34
+
35
+ #---------------------------------------------------------------------------------------
36
+ #
37
+ #---------------------------------------------------------------------------------------
38
+
39
+ def initialize(*params)
40
+ super(*params)
41
+ @column_mapping.mapping = @headers if !@dimensions
42
+ @map_klass = (@format == :map)? Hash : Critbit
43
+ end
44
+
45
+ #---------------------------------------------------------------------------------------
46
+ # Maps columns to the given names. In map reader, there is no column reordering, as
47
+ # this does not really make any sense, since one gets to the data through the key and
48
+ # not through its position in the array. If there are dimensions set, then every
49
+ # dimension will map to true, in order for it to be properly processed by the parsing
50
+ # method. Other fields can still be mapped to false, so that they are not read if
51
+ # desired.
52
+ #---------------------------------------------------------------------------------------
53
+
54
+ def mapping=(column_mapping)
55
+
56
+ @column_mapping.mapping ||= Array.new
57
+
58
+ @headers.each_with_index do |h, i|
59
+ next if @dimensions && !@dimensions[h].nil?
60
+ name = column_mapping[h]
61
+ raise ArgumentError.new("'true' is not allowed as a mapping: #{column_mapping}") if
62
+ name == true
63
+ @column_mapping.mapping[i] = (name.nil?)? h : name
64
+ end
65
+
66
+ end
67
+
68
+ #---------------------------------------------------------------------------------------
69
+ # read the file.
70
+ #---------------------------------------------------------------------------------------
71
+
72
+ def read(&block)
73
+
74
+ # When no block given, chunks read are stored in an array and returned to the user.
75
+ if (!block_given?)
76
+ # if dimensions and chunk_size is 0, then do not wrap each row in an array, we
77
+ # can access the data directly by using the dimension key
78
+ if (@dimensions && @chunk_size == 0)
79
+ rows = @map_klass.new
80
+ parse_with_block do |line_no, row_no, chunk|
81
+ rows.merge!(chunk)
82
+ end
83
+ else
84
+ # chunk_size > 0, then each chunk should be a hash, and all chunks should
85
+ # be wrapped inside an array
86
+ rows = []
87
+ parse_with_block do |line_no, row_no, chunk|
88
+ rows << chunk
89
+ end
90
+ end
91
+ rows
92
+ else # block given
93
+ parse_with_block(&block)
94
+ end
95
+ end
96
+
97
+ #---------------------------------------------------------------------------------------
98
+ #
99
+ #---------------------------------------------------------------------------------------
100
+
101
+ private
102
+
103
+ #---------------------------------------------------------------------------------------
104
+ #
105
+ #---------------------------------------------------------------------------------------
106
+
107
+ def new_reader(preferences)
108
+
109
+ begin
110
+ raise MissingHeadersError.new("Reading file as map requires headers.") if
111
+ (!@headers && !@custom_headers)
112
+ @reader = CMR.new(FileReader.new(@filename), preferences, @dimensions,
113
+ @suppress_warnings)
114
+ rescue java.io.IOException => e
115
+ raise IOError.new(e.message)
116
+ end
117
+
118
+ end
119
+
120
+ #---------------------------------------------------------------------------------------
121
+ #
122
+ #---------------------------------------------------------------------------------------
123
+
124
+ def format(chunk)
125
+ chunk
126
+ end
127
+
128
+ #---------------------------------------------------------------------------------------
129
+ # Maps columns to the given names. In map reader, there is no column reordering, as
130
+ # this does not really make any sense, since one gets to the data through the key and
131
+ # not through its position in the array. If there are dimensions set, then every
132
+ # dimension will map to true, in order for it to be properly processed by the parsing
133
+ # method. Other fields can still be mapped to false, so that they are not read if
134
+ # desired.
135
+ #---------------------------------------------------------------------------------------
136
+
137
+ def assign_mapping(column_mapping)
138
+
139
+ @column_mapping.mapping ||= Array.new
140
+
141
+ @headers.each_with_index do |h, i|
142
+ name = column_mapping[h]
143
+ @column_mapping.mapping[i] = (name.nil?)? h : name
144
+ end
145
+
146
+ end
147
+
148
+ #---------------------------------------------------------------------------------------
149
+ # A chunk is either one row of the file, or an array with rows. One row can be either
150
+ # a one dimensional array with all columns or a hash with all columns (excluding the
151
+ # dimensions).
152
+ #---------------------------------------------------------------------------------------
153
+
154
+ def read_chunk
155
+
156
+ if (@dimensions)
157
+ if (@chunk_size == 0)
158
+ row = @reader.read(@column_mapping, @filters)
159
+ return (row.nil?)? nil : { row.delete(:key).join(".") => row }
160
+ end
161
+
162
+ rows = {}
163
+ (1..@chunk_size).each do |i|
164
+ if ((row = @reader.read(@column_mapping, @filters)).nil?)
165
+ return (rows.size == 0)? nil : rows
166
+ else
167
+ if (@deep_map)
168
+ key = row.delete(:key)
169
+ key.reduce(rows) { |h,m| h[m] ||= {} }
170
+ last = key.pop
171
+ if (key.inject(rows, :fetch)[last] != {})
172
+ # p "overriding value for key: #{chunk[:key]} with #{chunk}"
173
+ raise DuplicateKeyError.new("Key #{row[:key]} not unique for this dataset. #{row}")
174
+ end
175
+ key.inject(rows, :fetch)[last] = row
176
+ else # not a deep map
177
+ key = row.delete(:key).join(".")
178
+ raise DuplicateKeyError.new("Key #{key} not unique for this dataset. #{row}") if
179
+ rows.has_key?(key)
180
+ rows.merge!({key => row})
181
+ end
182
+ end
183
+ end
184
+ return rows
185
+ else # no dimensions
186
+ super
187
+ end
188
+ end
189
+
190
+ end
191
+
192
+ end
@@ -0,0 +1,24 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ ##########################################################################################
4
+ # @author Rodrigo Botafogo
5
+ #
6
+ # Copyright © 2015 Rodrigo Botafogo. All Rights Reserved. Permission to use, copy, modify,
7
+ # and distribute this software and its documentation, without fee and without a signed
8
+ # licensing agreement, is hereby granted, provided that the above copyright notice, this
9
+ # paragraph and the following two paragraphs appear in all copies, modifications, and
10
+ # distributions.
11
+ #
12
+ # IN NO EVENT SHALL RODRIGO BOTAFOGO BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
13
+ # INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
14
+ # THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF RODRIGO BOTAFOGO HAS BEEN ADVISED OF THE
15
+ # POSSIBILITY OF SUCH DAMAGE.
16
+ #
17
+ # RODRIGO BOTAFOGO SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
18
+ # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
19
+ # SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS".
20
+ # RODRIGO BOTAFOGO HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS,
21
+ # OR MODIFICATIONS.
22
+ ##########################################################################################
23
+
24
+ require_relative 'jcsv'
@@ -0,0 +1,110 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ ##########################################################################################
4
+ # @author Rodrigo Botafogo
5
+ #
6
+ # Copyright © 2015 Rodrigo Botafogo. All Rights Reserved. Permission to use, copy, modify,
7
+ # and distribute this software and its documentation, without fee and without a signed
8
+ # licensing agreement, is hereby granted, provided that the above copyright notice, this
9
+ # paragraph and the following two paragraphs appear in all copies, modifications, and
10
+ # distributions.
11
+ #
12
+ # IN NO EVENT SHALL RODRIGO BOTAFOGO BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
13
+ # INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
14
+ # THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF RODRIGO BOTAFOGO HAS BEEN ADVISED OF THE
15
+ # POSSIBILITY OF SUCH DAMAGE.
16
+ #
17
+ # RODRIGO BOTAFOGO SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
18
+ # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
19
+ # SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS".
20
+ # RODRIGO BOTAFOGO HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS,
21
+ # OR MODIFICATIONS.
22
+ ##########################################################################################
23
+
24
+ require 'mdarray'
25
+
26
+ class Jcsv
27
+
28
+ #========================================================================================
29
+ #
30
+ #========================================================================================
31
+
32
+ class MDArrayReader < MapReader
33
+ include_package "java.io"
34
+
35
+ #---------------------------------------------------------------------------------------
36
+ #
37
+ #---------------------------------------------------------------------------------------
38
+
39
+ def initialize(*params)
40
+
41
+ filter = nil
42
+
43
+ @dtype = params[1].delete(:dtype)
44
+
45
+ case @dtype
46
+ when :byte, :short, :int
47
+ filter = Jcsv.int
48
+ when :long
49
+ filter = Jcsv.long
50
+ when :float, :double
51
+ filter = Jcsv.double
52
+ else
53
+ raise "Cannot create MDArray of dtype '#{@dtype}'"
54
+ end
55
+
56
+ params[1][:default_filter] = filter
57
+ super(*params)
58
+
59
+ end
60
+
61
+ #---------------------------------------------------------------------------------------
62
+ #
63
+ #---------------------------------------------------------------------------------------
64
+
65
+ def read
66
+ to_mdarray(@dtype, super)
67
+ end
68
+
69
+ #---------------------------------------------------------------------------------------
70
+ # Converts the data to an MDArray
71
+ #---------------------------------------------------------------------------------------
72
+
73
+ def to_mdarray(dtype, storage)
74
+
75
+ raise "Cannot convert deep map into MDArray" if (@deep_map == true)
76
+
77
+ prod = nil
78
+ shape = []
79
+ vector = []
80
+
81
+ columns = @column_mapping.mapping - [true, false, nil]
82
+ header_size = columns.size
83
+
84
+ if (@dimensions.nil?)
85
+ shape = [storage.size, header_size]
86
+ storage.each do |line|
87
+ vector.concat(line.values)
88
+ end
89
+ else
90
+ @dimensions.dimensions_names.each do |name|
91
+ keys = @dimensions[name].labels.keys
92
+ shape << keys.size
93
+ prod = (prod.nil?)? keys : prod.product(keys)
94
+ end
95
+
96
+ shape << header_size
97
+
98
+ prod.each do |k|
99
+ row = (@dimensions.dimensions_names.size > 1)? storage[k.flatten.join(".")] : storage[k]
100
+ vector.concat(((row.nil?)? ([Float::NAN] * header_size) : row.values))
101
+ end
102
+ end
103
+
104
+ array = MDArray.build(@dtype, shape, vector)
105
+
106
+ end
107
+
108
+ end
109
+
110
+ end
@@ -0,0 +1,225 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ ##########################################################################################
4
+ # @author Rodrigo Botafogo
5
+ #
6
+ # Copyright © 2015 Rodrigo Botafogo. All Rights Reserved. Permission to use, copy, modify,
7
+ # and distribute this software and its documentation, without fee and without a signed
8
+ # licensing agreement, is hereby granted, provided that the above copyright notice, this
9
+ # paragraph and the following two paragraphs appear in all copies, modifications, and
10
+ # distributions.
11
+ #
12
+ # IN NO EVENT SHALL RODRIGO BOTAFOGO BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
13
+ # INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
14
+ # THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF RODRIGO BOTAFOGO HAS BEEN ADVISED OF THE
15
+ # POSSIBILITY OF SUCH DAMAGE.
16
+ #
17
+ # RODRIGO BOTAFOGO SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
18
+ # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
19
+ # SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS".
20
+ # RODRIGO BOTAFOGO HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS,
21
+ # OR MODIFICATIONS.
22
+ ##########################################################################################
23
+
24
+ require 'bigdecimal'
25
+ require_relative 'locale'
26
+
27
+ class Jcsv
28
+ # include_package "org.supercsv.cellprocessor"
29
+ # include_package "org.supercsv.cellprocessor.constraint"
30
+
31
+ #========================================================================================
32
+ #
33
+ #========================================================================================
34
+
35
+ class RBParseInt < org.supercsv.cellprocessor.ParseInt
36
+ include NextFilter
37
+
38
+ def execute(value, context)
39
+ begin
40
+ exec_next(super(value, context), context)
41
+ rescue org.supercsv.exception.SuperCsvCellProcessorException => e
42
+ raise FilterError.new("#{e.message} in #{context}")
43
+ end
44
+ end
45
+
46
+ end
47
+
48
+ #========================================================================================
49
+ #
50
+ #========================================================================================
51
+
52
+ class RBParseLong < org.supercsv.cellprocessor.ParseLong
53
+ include NextFilter
54
+
55
+ def execute(value, context)
56
+ begin
57
+ exec_next(super(value, context), context)
58
+ rescue org.supercsv.exception.SuperCsvCellProcessorException => e
59
+ raise FilterError, "#{e.message} in #{context}"
60
+ end
61
+ end
62
+
63
+ end
64
+
65
+ #========================================================================================
66
+ #
67
+ #========================================================================================
68
+
69
+ class RBParseDouble < org.supercsv.cellprocessor.ParseDouble
70
+ include NextFilter
71
+
72
+ def execute(value, context)
73
+ begin
74
+ exec_next(super(value, context), context)
75
+ rescue org.supercsv.exception.SuperCsvCellProcessorException => e
76
+ raise FilterError, "#{e.message} in #{context}"
77
+ end
78
+ end
79
+
80
+ end
81
+
82
+ #========================================================================================
83
+ #
84
+ #========================================================================================
85
+
86
+ class RBParseFloat < Filter
87
+
88
+ attr_reader :locale
89
+ attr_reader :dfs
90
+
91
+ def initialize(locale)
92
+ @locale = locale
93
+ @dfs = DFSymbols.new(locale)
94
+ @grouping_separator = @dfs.grouping_separator
95
+ @decimal_separator = @dfs.decimal_separator
96
+ super()
97
+ end
98
+
99
+ def execute(value, context)
100
+ validateInputNotNull(value, context)
101
+ value = value.gsub(@grouping_separator.chr, "").
102
+ gsub(@decimal_separator.chr, ".").to_f
103
+ exec_next(value, context)
104
+ end
105
+
106
+ end
107
+
108
+ #========================================================================================
109
+ #
110
+ #========================================================================================
111
+
112
+ class RBParseBignum < Filter
113
+
114
+ def execute(value, context)
115
+ validateInputNotNull(value, context)
116
+ exec_next(value.to_i, context)
117
+ end
118
+
119
+ end
120
+
121
+ #========================================================================================
122
+ #
123
+ #========================================================================================
124
+
125
+ class RBParseComplex < Filter
126
+
127
+ def execute(value, context)
128
+ validateInputNotNull(value, context)
129
+ exec_next(value.to_c, context)
130
+ end
131
+
132
+ end
133
+
134
+ #========================================================================================
135
+ #
136
+ #========================================================================================
137
+
138
+ class RBParseRational < Filter
139
+
140
+ def execute(value, context)
141
+ validateInputNotNull(value, context)
142
+ exec_next(value.to_r, context)
143
+ end
144
+
145
+ end
146
+
147
+ #========================================================================================
148
+ #
149
+ #========================================================================================
150
+
151
+ class RBParseBigDecimal < Filter
152
+
153
+ attr_reader :locale
154
+ attr_reader :dfs
155
+
156
+ def initialize(locale)
157
+ @locale = locale
158
+ @dfs = DFSymbols.new(locale)
159
+ @grouping_separator = @dfs.grouping_separator
160
+ @decimal_separator = @dfs.decimal_separator
161
+ super()
162
+ end
163
+
164
+ def execute(value, context)
165
+ validateInputNotNull(value, context)
166
+ # raise "BigDecimal expects a String as input not #{value}" if !(value.is_a? String)
167
+ bd = BigDecimal.new(value.gsub(@grouping_separator.chr, "").
168
+ gsub(@decimal_separator.chr, "."))
169
+ exec_next(bd, context)
170
+ end
171
+
172
+ end
173
+
174
+ #========================================================================================
175
+ #
176
+ #========================================================================================
177
+
178
+ def self.int
179
+ RBParseInt.new
180
+ end
181
+
182
+ def self.long
183
+ RBParseLong.new
184
+ end
185
+
186
+ def self.double
187
+ RBParseDouble.new
188
+ end
189
+
190
+ def self.fixnum
191
+ RBParseBignum.new
192
+ end
193
+
194
+ def self.float(locale = Locale.default)
195
+ RBParseFloat.new(locale)
196
+ end
197
+
198
+ def self.complex
199
+ RBParseComplex.new
200
+ end
201
+
202
+ def self.rational
203
+ RBParseRational.new
204
+ end
205
+
206
+ def self.bignum
207
+ RBParseBignum.new
208
+ end
209
+
210
+ #---------------------------------------------------------------------------------------
211
+ # Convert a String to a BigDecimal. It uses the String constructor of BigDecimal
212
+ # (new BigDecimal("0.1")) as it yields predictable results (see BigDecimal).
213
+ # If the data uses a character other than "." as a decimal separator (Germany uses ","
214
+ # for example), then use the constructor that accepts a DecimalFormatSymbols object, as
215
+ # it will convert the character to a "." before creating the BigDecimal. Likewise if the
216
+ # data contains a grouping separator (Germany uses "." for example) then supplying a
217
+ # DecimalFormatSymbols object will allow grouping separators to be removed before
218
+ # parsing.
219
+ #---------------------------------------------------------------------------------------
220
+
221
+ def self.big_decimal(locale = Locale.default)
222
+ Jcsv::RBParseBigDecimal.new(locale)
223
+ end
224
+
225
+ end