mdarray-jcsv 0.6.3-java

Sign up to get free protection for your applications and to get access to all the features.
Files changed (44) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +23 -0
  3. data/README.md +2 -0
  4. data/Rakefile +46 -0
  5. data/config.rb +104 -0
  6. data/lib/constraints.rb +205 -0
  7. data/lib/date_filters.rb +252 -0
  8. data/lib/dimensions.rb +276 -0
  9. data/lib/filters.rb +332 -0
  10. data/lib/jcsv.rb +107 -0
  11. data/lib/list_reader.rb +200 -0
  12. data/lib/locale.rb +192 -0
  13. data/lib/map_reader.rb +192 -0
  14. data/lib/mdarray-jcsv.rb +24 -0
  15. data/lib/mdarray_reader.rb +110 -0
  16. data/lib/numeric_filters.rb +225 -0
  17. data/lib/reader.rb +547 -0
  18. data/lib/supercsv_interface.rb +231 -0
  19. data/test/test_complete.rb +37 -0
  20. data/test/test_critbit.rb +442 -0
  21. data/test/test_customer_list.rb +436 -0
  22. data/test/test_customer_map.rb +209 -0
  23. data/test/test_customer_nhlist.rb +161 -0
  24. data/test/test_deep_map.rb +264 -0
  25. data/test/test_del.rb +73 -0
  26. data/test/test_dimensions.rb +231 -0
  27. data/test/test_example.rb +79 -0
  28. data/test/test_filters.rb +374 -0
  29. data/test/test_list_dimensions.rb +110 -0
  30. data/test/test_mdarray.rb +227 -0
  31. data/test/test_missing_data.rb +57 -0
  32. data/vendor/commons-beanutils-1.8.3.jar +0 -0
  33. data/vendor/commons-lang3-3.1.jar +0 -0
  34. data/vendor/dozer-5.4.0.jar +0 -0
  35. data/vendor/jcl-over-slf4j-1.6.6.jar +0 -0
  36. data/vendor/joda-time-2.7.jar +0 -0
  37. data/vendor/slf4j-api-1.7.5.jar +0 -0
  38. data/vendor/snakeyaml-1.14.jar +0 -0
  39. data/vendor/super-csv-2.4.0.jar +0 -0
  40. data/vendor/super-csv-dozer-2.4.0.jar +0 -0
  41. data/vendor/super-csv-java8-2.4.0.jar +0 -0
  42. data/vendor/super-csv-joda-2.4.0.jar +0 -0
  43. data/version.rb +2 -0
  44. metadata +196 -0
@@ -0,0 +1,192 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ ##########################################################################################
4
+ # author Rodrigo Botafogo
5
+ #
6
+ # Copyright © 2015 Rodrigo Botafogo. All Rights Reserved. Permission to use, copy, modify,
7
+ # and distribute this software and its documentation, without fee and without a signed
8
+ # licensing agreement, is hereby granted, provided that the above copyright notice, this
9
+ # paragraph and the following two paragraphs appear in all copies, modifications, and
10
+ # distributions.
11
+ #
12
+ # IN NO EVENT SHALL RODRIGO BOTAFOGO BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
13
+ # INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
14
+ # THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF RODRIGO BOTAFOGO HAS BEEN ADVISED OF THE
15
+ # POSSIBILITY OF SUCH DAMAGE.
16
+ #
17
+ # RODRIGO BOTAFOGO SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
18
+ # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
19
+ # SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS".
20
+ # RODRIGO BOTAFOGO HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS,
21
+ # OR MODIFICATIONS.
22
+ ##########################################################################################
23
+
24
+ require 'critbit'
25
+
26
+ class Jcsv
27
+
28
+ #========================================================================================
29
+ #
30
+ #========================================================================================
31
+
32
+ class MapReader < Reader
33
+ include_package "java.io"
34
+
35
+ #---------------------------------------------------------------------------------------
36
+ #
37
+ #---------------------------------------------------------------------------------------
38
+
39
+ def initialize(*params)
40
+ super(*params)
41
+ @column_mapping.mapping = @headers if !@dimensions
42
+ @map_klass = (@format == :map)? Hash : Critbit
43
+ end
44
+
45
+ #---------------------------------------------------------------------------------------
46
+ # Maps columns to the given names. In map reader, there is no column reordering, as
47
+ # this does not really make any sense, since one gets to the data through the key and
48
+ # not through its position in the array. If there are dimensions set, then every
49
+ # dimension will map to true, in order for it to be properly processed by the parsing
50
+ # method. Other fields can still be mapped to false, so that they are not read if
51
+ # desired.
52
+ #---------------------------------------------------------------------------------------
53
+
54
+ def mapping=(column_mapping)
55
+
56
+ @column_mapping.mapping ||= Array.new
57
+
58
+ @headers.each_with_index do |h, i|
59
+ next if @dimensions && !@dimensions[h].nil?
60
+ name = column_mapping[h]
61
+ raise ArgumentError.new("'true' is not allowed as a mapping: #{column_mapping}") if
62
+ name == true
63
+ @column_mapping.mapping[i] = (name.nil?)? h : name
64
+ end
65
+
66
+ end
67
+
68
+ #---------------------------------------------------------------------------------------
69
+ # read the file.
70
+ #---------------------------------------------------------------------------------------
71
+
72
+ def read(&block)
73
+
74
+ # When no block given, chunks read are stored in an array and returned to the user.
75
+ if (!block_given?)
76
+ # if dimensions and chunk_size is 0, then do not wrap each row in an array, we
77
+ # can access the data directly by using the dimension key
78
+ if (@dimensions && @chunk_size == 0)
79
+ rows = @map_klass.new
80
+ parse_with_block do |line_no, row_no, chunk|
81
+ rows.merge!(chunk)
82
+ end
83
+ else
84
+ # chunk_size > 0, then each chunk should be a hash, and all chunks should
85
+ # be wrapped inside an array
86
+ rows = []
87
+ parse_with_block do |line_no, row_no, chunk|
88
+ rows << chunk
89
+ end
90
+ end
91
+ rows
92
+ else # block given
93
+ parse_with_block(&block)
94
+ end
95
+ end
96
+
97
+ #---------------------------------------------------------------------------------------
98
+ #
99
+ #---------------------------------------------------------------------------------------
100
+
101
+ private
102
+
103
+ #---------------------------------------------------------------------------------------
104
+ #
105
+ #---------------------------------------------------------------------------------------
106
+
107
+ def new_reader(preferences)
108
+
109
+ begin
110
+ raise MissingHeadersError.new("Reading file as map requires headers.") if
111
+ (!@headers && !@custom_headers)
112
+ @reader = CMR.new(FileReader.new(@filename), preferences, @dimensions,
113
+ @suppress_warnings)
114
+ rescue java.io.IOException => e
115
+ raise IOError.new(e.message)
116
+ end
117
+
118
+ end
119
+
120
+ #---------------------------------------------------------------------------------------
121
+ #
122
+ #---------------------------------------------------------------------------------------
123
+
124
+ def format(chunk)
125
+ chunk
126
+ end
127
+
128
+ #---------------------------------------------------------------------------------------
129
+ # Maps columns to the given names. In map reader, there is no column reordering, as
130
+ # this does not really make any sense, since one gets to the data through the key and
131
+ # not through its position in the array. If there are dimensions set, then every
132
+ # dimension will map to true, in order for it to be properly processed by the parsing
133
+ # method. Other fields can still be mapped to false, so that they are not read if
134
+ # desired.
135
+ #---------------------------------------------------------------------------------------
136
+
137
+ def assign_mapping(column_mapping)
138
+
139
+ @column_mapping.mapping ||= Array.new
140
+
141
+ @headers.each_with_index do |h, i|
142
+ name = column_mapping[h]
143
+ @column_mapping.mapping[i] = (name.nil?)? h : name
144
+ end
145
+
146
+ end
147
+
148
+ #---------------------------------------------------------------------------------------
149
+ # A chunk is either one row of the file, or an array with rows. One row can be either
150
+ # a one dimensional array with all columns or a hash with all columns (excluding the
151
+ # dimensions).
152
+ #---------------------------------------------------------------------------------------
153
+
154
+ def read_chunk
155
+
156
+ if (@dimensions)
157
+ if (@chunk_size == 0)
158
+ row = @reader.read(@column_mapping, @filters)
159
+ return (row.nil?)? nil : { row.delete(:key).join(".") => row }
160
+ end
161
+
162
+ rows = {}
163
+ (1..@chunk_size).each do |i|
164
+ if ((row = @reader.read(@column_mapping, @filters)).nil?)
165
+ return (rows.size == 0)? nil : rows
166
+ else
167
+ if (@deep_map)
168
+ key = row.delete(:key)
169
+ key.reduce(rows) { |h,m| h[m] ||= {} }
170
+ last = key.pop
171
+ if (key.inject(rows, :fetch)[last] != {})
172
+ # p "overriding value for key: #{chunk[:key]} with #{chunk}"
173
+ raise DuplicateKeyError.new("Key #{row[:key]} not unique for this dataset. #{row}")
174
+ end
175
+ key.inject(rows, :fetch)[last] = row
176
+ else # not a deep map
177
+ key = row.delete(:key).join(".")
178
+ raise DuplicateKeyError.new("Key #{key} not unique for this dataset. #{row}") if
179
+ rows.has_key?(key)
180
+ rows.merge!({key => row})
181
+ end
182
+ end
183
+ end
184
+ return rows
185
+ else # no dimensions
186
+ super
187
+ end
188
+ end
189
+
190
+ end
191
+
192
+ end
@@ -0,0 +1,24 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ ##########################################################################################
4
+ # @author Rodrigo Botafogo
5
+ #
6
+ # Copyright © 2015 Rodrigo Botafogo. All Rights Reserved. Permission to use, copy, modify,
7
+ # and distribute this software and its documentation, without fee and without a signed
8
+ # licensing agreement, is hereby granted, provided that the above copyright notice, this
9
+ # paragraph and the following two paragraphs appear in all copies, modifications, and
10
+ # distributions.
11
+ #
12
+ # IN NO EVENT SHALL RODRIGO BOTAFOGO BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
13
+ # INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
14
+ # THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF RODRIGO BOTAFOGO HAS BEEN ADVISED OF THE
15
+ # POSSIBILITY OF SUCH DAMAGE.
16
+ #
17
+ # RODRIGO BOTAFOGO SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
18
+ # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
19
+ # SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS".
20
+ # RODRIGO BOTAFOGO HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS,
21
+ # OR MODIFICATIONS.
22
+ ##########################################################################################
23
+
24
+ require_relative 'jcsv'
@@ -0,0 +1,110 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ ##########################################################################################
4
+ # @author Rodrigo Botafogo
5
+ #
6
+ # Copyright © 2015 Rodrigo Botafogo. All Rights Reserved. Permission to use, copy, modify,
7
+ # and distribute this software and its documentation, without fee and without a signed
8
+ # licensing agreement, is hereby granted, provided that the above copyright notice, this
9
+ # paragraph and the following two paragraphs appear in all copies, modifications, and
10
+ # distributions.
11
+ #
12
+ # IN NO EVENT SHALL RODRIGO BOTAFOGO BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
13
+ # INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
14
+ # THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF RODRIGO BOTAFOGO HAS BEEN ADVISED OF THE
15
+ # POSSIBILITY OF SUCH DAMAGE.
16
+ #
17
+ # RODRIGO BOTAFOGO SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
18
+ # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
19
+ # SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS".
20
+ # RODRIGO BOTAFOGO HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS,
21
+ # OR MODIFICATIONS.
22
+ ##########################################################################################
23
+
24
+ require 'mdarray'
25
+
26
+ class Jcsv
27
+
28
+ #========================================================================================
29
+ #
30
+ #========================================================================================
31
+
32
+ class MDArrayReader < MapReader
33
+ include_package "java.io"
34
+
35
+ #---------------------------------------------------------------------------------------
36
+ #
37
+ #---------------------------------------------------------------------------------------
38
+
39
+ def initialize(*params)
40
+
41
+ filter = nil
42
+
43
+ @dtype = params[1].delete(:dtype)
44
+
45
+ case @dtype
46
+ when :byte, :short, :int
47
+ filter = Jcsv.int
48
+ when :long
49
+ filter = Jcsv.long
50
+ when :float, :double
51
+ filter = Jcsv.double
52
+ else
53
+ raise "Cannot create MDArray of dtype '#{@dtype}'"
54
+ end
55
+
56
+ params[1][:default_filter] = filter
57
+ super(*params)
58
+
59
+ end
60
+
61
+ #---------------------------------------------------------------------------------------
62
+ #
63
+ #---------------------------------------------------------------------------------------
64
+
65
+ def read
66
+ to_mdarray(@dtype, super)
67
+ end
68
+
69
+ #---------------------------------------------------------------------------------------
70
+ # Converts the data to an MDArray
71
+ #---------------------------------------------------------------------------------------
72
+
73
+ def to_mdarray(dtype, storage)
74
+
75
+ raise "Cannot convert deep map into MDArray" if (@deep_map == true)
76
+
77
+ prod = nil
78
+ shape = []
79
+ vector = []
80
+
81
+ columns = @column_mapping.mapping - [true, false, nil]
82
+ header_size = columns.size
83
+
84
+ if (@dimensions.nil?)
85
+ shape = [storage.size, header_size]
86
+ storage.each do |line|
87
+ vector.concat(line.values)
88
+ end
89
+ else
90
+ @dimensions.dimensions_names.each do |name|
91
+ keys = @dimensions[name].labels.keys
92
+ shape << keys.size
93
+ prod = (prod.nil?)? keys : prod.product(keys)
94
+ end
95
+
96
+ shape << header_size
97
+
98
+ prod.each do |k|
99
+ row = (@dimensions.dimensions_names.size > 1)? storage[k.flatten.join(".")] : storage[k]
100
+ vector.concat(((row.nil?)? ([Float::NAN] * header_size) : row.values))
101
+ end
102
+ end
103
+
104
+ array = MDArray.build(@dtype, shape, vector)
105
+
106
+ end
107
+
108
+ end
109
+
110
+ end
@@ -0,0 +1,225 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ ##########################################################################################
4
+ # @author Rodrigo Botafogo
5
+ #
6
+ # Copyright © 2015 Rodrigo Botafogo. All Rights Reserved. Permission to use, copy, modify,
7
+ # and distribute this software and its documentation, without fee and without a signed
8
+ # licensing agreement, is hereby granted, provided that the above copyright notice, this
9
+ # paragraph and the following two paragraphs appear in all copies, modifications, and
10
+ # distributions.
11
+ #
12
+ # IN NO EVENT SHALL RODRIGO BOTAFOGO BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
13
+ # INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
14
+ # THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF RODRIGO BOTAFOGO HAS BEEN ADVISED OF THE
15
+ # POSSIBILITY OF SUCH DAMAGE.
16
+ #
17
+ # RODRIGO BOTAFOGO SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
18
+ # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
19
+ # SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS".
20
+ # RODRIGO BOTAFOGO HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS,
21
+ # OR MODIFICATIONS.
22
+ ##########################################################################################
23
+
24
+ require 'bigdecimal'
25
+ require_relative 'locale'
26
+
27
+ class Jcsv
28
+ # include_package "org.supercsv.cellprocessor"
29
+ # include_package "org.supercsv.cellprocessor.constraint"
30
+
31
+ #========================================================================================
32
+ #
33
+ #========================================================================================
34
+
35
+ class RBParseInt < org.supercsv.cellprocessor.ParseInt
36
+ include NextFilter
37
+
38
+ def execute(value, context)
39
+ begin
40
+ exec_next(super(value, context), context)
41
+ rescue org.supercsv.exception.SuperCsvCellProcessorException => e
42
+ raise FilterError.new("#{e.message} in #{context}")
43
+ end
44
+ end
45
+
46
+ end
47
+
48
+ #========================================================================================
49
+ #
50
+ #========================================================================================
51
+
52
+ class RBParseLong < org.supercsv.cellprocessor.ParseLong
53
+ include NextFilter
54
+
55
+ def execute(value, context)
56
+ begin
57
+ exec_next(super(value, context), context)
58
+ rescue org.supercsv.exception.SuperCsvCellProcessorException => e
59
+ raise FilterError, "#{e.message} in #{context}"
60
+ end
61
+ end
62
+
63
+ end
64
+
65
+ #========================================================================================
66
+ #
67
+ #========================================================================================
68
+
69
+ class RBParseDouble < org.supercsv.cellprocessor.ParseDouble
70
+ include NextFilter
71
+
72
+ def execute(value, context)
73
+ begin
74
+ exec_next(super(value, context), context)
75
+ rescue org.supercsv.exception.SuperCsvCellProcessorException => e
76
+ raise FilterError, "#{e.message} in #{context}"
77
+ end
78
+ end
79
+
80
+ end
81
+
82
+ #========================================================================================
83
+ #
84
+ #========================================================================================
85
+
86
+ class RBParseFloat < Filter
87
+
88
+ attr_reader :locale
89
+ attr_reader :dfs
90
+
91
+ def initialize(locale)
92
+ @locale = locale
93
+ @dfs = DFSymbols.new(locale)
94
+ @grouping_separator = @dfs.grouping_separator
95
+ @decimal_separator = @dfs.decimal_separator
96
+ super()
97
+ end
98
+
99
+ def execute(value, context)
100
+ validateInputNotNull(value, context)
101
+ value = value.gsub(@grouping_separator.chr, "").
102
+ gsub(@decimal_separator.chr, ".").to_f
103
+ exec_next(value, context)
104
+ end
105
+
106
+ end
107
+
108
+ #========================================================================================
109
+ #
110
+ #========================================================================================
111
+
112
+ class RBParseBignum < Filter
113
+
114
+ def execute(value, context)
115
+ validateInputNotNull(value, context)
116
+ exec_next(value.to_i, context)
117
+ end
118
+
119
+ end
120
+
121
+ #========================================================================================
122
+ #
123
+ #========================================================================================
124
+
125
+ class RBParseComplex < Filter
126
+
127
+ def execute(value, context)
128
+ validateInputNotNull(value, context)
129
+ exec_next(value.to_c, context)
130
+ end
131
+
132
+ end
133
+
134
+ #========================================================================================
135
+ #
136
+ #========================================================================================
137
+
138
+ class RBParseRational < Filter
139
+
140
+ def execute(value, context)
141
+ validateInputNotNull(value, context)
142
+ exec_next(value.to_r, context)
143
+ end
144
+
145
+ end
146
+
147
+ #========================================================================================
148
+ #
149
+ #========================================================================================
150
+
151
+ class RBParseBigDecimal < Filter
152
+
153
+ attr_reader :locale
154
+ attr_reader :dfs
155
+
156
+ def initialize(locale)
157
+ @locale = locale
158
+ @dfs = DFSymbols.new(locale)
159
+ @grouping_separator = @dfs.grouping_separator
160
+ @decimal_separator = @dfs.decimal_separator
161
+ super()
162
+ end
163
+
164
+ def execute(value, context)
165
+ validateInputNotNull(value, context)
166
+ # raise "BigDecimal expects a String as input not #{value}" if !(value.is_a? String)
167
+ bd = BigDecimal.new(value.gsub(@grouping_separator.chr, "").
168
+ gsub(@decimal_separator.chr, "."))
169
+ exec_next(bd, context)
170
+ end
171
+
172
+ end
173
+
174
+ #========================================================================================
175
+ #
176
+ #========================================================================================
177
+
178
+ def self.int
179
+ RBParseInt.new
180
+ end
181
+
182
+ def self.long
183
+ RBParseLong.new
184
+ end
185
+
186
+ def self.double
187
+ RBParseDouble.new
188
+ end
189
+
190
+ def self.fixnum
191
+ RBParseBignum.new
192
+ end
193
+
194
+ def self.float(locale = Locale.default)
195
+ RBParseFloat.new(locale)
196
+ end
197
+
198
+ def self.complex
199
+ RBParseComplex.new
200
+ end
201
+
202
+ def self.rational
203
+ RBParseRational.new
204
+ end
205
+
206
+ def self.bignum
207
+ RBParseBignum.new
208
+ end
209
+
210
+ #---------------------------------------------------------------------------------------
211
+ # Convert a String to a BigDecimal. It uses the String constructor of BigDecimal
212
+ # (new BigDecimal("0.1")) as it yields predictable results (see BigDecimal).
213
+ # If the data uses a character other than "." as a decimal separator (Germany uses ","
214
+ # for example), then use the constructor that accepts a DecimalFormatSymbols object, as
215
+ # it will convert the character to a "." before creating the BigDecimal. Likewise if the
216
+ # data contains a grouping separator (Germany uses "." for example) then supplying a
217
+ # DecimalFormatSymbols object will allow grouping separators to be removed before
218
+ # parsing.
219
+ #---------------------------------------------------------------------------------------
220
+
221
+ def self.big_decimal(locale = Locale.default)
222
+ Jcsv::RBParseBigDecimal.new(locale)
223
+ end
224
+
225
+ end