mdarray-jcsv 0.6.3-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +23 -0
  3. data/README.md +2 -0
  4. data/Rakefile +46 -0
  5. data/config.rb +104 -0
  6. data/lib/constraints.rb +205 -0
  7. data/lib/date_filters.rb +252 -0
  8. data/lib/dimensions.rb +276 -0
  9. data/lib/filters.rb +332 -0
  10. data/lib/jcsv.rb +107 -0
  11. data/lib/list_reader.rb +200 -0
  12. data/lib/locale.rb +192 -0
  13. data/lib/map_reader.rb +192 -0
  14. data/lib/mdarray-jcsv.rb +24 -0
  15. data/lib/mdarray_reader.rb +110 -0
  16. data/lib/numeric_filters.rb +225 -0
  17. data/lib/reader.rb +547 -0
  18. data/lib/supercsv_interface.rb +231 -0
  19. data/test/test_complete.rb +37 -0
  20. data/test/test_critbit.rb +442 -0
  21. data/test/test_customer_list.rb +436 -0
  22. data/test/test_customer_map.rb +209 -0
  23. data/test/test_customer_nhlist.rb +161 -0
  24. data/test/test_deep_map.rb +264 -0
  25. data/test/test_del.rb +73 -0
  26. data/test/test_dimensions.rb +231 -0
  27. data/test/test_example.rb +79 -0
  28. data/test/test_filters.rb +374 -0
  29. data/test/test_list_dimensions.rb +110 -0
  30. data/test/test_mdarray.rb +227 -0
  31. data/test/test_missing_data.rb +57 -0
  32. data/vendor/commons-beanutils-1.8.3.jar +0 -0
  33. data/vendor/commons-lang3-3.1.jar +0 -0
  34. data/vendor/dozer-5.4.0.jar +0 -0
  35. data/vendor/jcl-over-slf4j-1.6.6.jar +0 -0
  36. data/vendor/joda-time-2.7.jar +0 -0
  37. data/vendor/slf4j-api-1.7.5.jar +0 -0
  38. data/vendor/snakeyaml-1.14.jar +0 -0
  39. data/vendor/super-csv-2.4.0.jar +0 -0
  40. data/vendor/super-csv-dozer-2.4.0.jar +0 -0
  41. data/vendor/super-csv-java8-2.4.0.jar +0 -0
  42. data/vendor/super-csv-joda-2.4.0.jar +0 -0
  43. data/version.rb +2 -0
  44. metadata +196 -0
@@ -0,0 +1,161 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ ##########################################################################################
4
+ # Copyright © 2015 Rodrigo Botafogo. All Rights Reserved. Permission to use, copy, modify,
5
+ # and distribute this software and its documentation for educational, research, and
6
+ # not-for-profit purposes, without fee and without a signed licensing agreement, is hereby
7
+ # granted, provided that the above copyright notice, this paragraph and the following two
8
+ # paragraphs appear in all copies, modifications, and distributions. Contact Rodrigo
9
+ # Botafogo - rodrigo.a.botafogo@gmail.com for commercial licensing opportunities.
10
+ #
11
+ # IN NO EVENT SHALL RODRIGO BOTAFOGO BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
12
+ # INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
13
+ # THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF RODRIGO BOTAFOGO HAS BEEN ADVISED OF THE
14
+ # POSSIBILITY OF SUCH DAMAGE.
15
+ #
16
+ # RODRIGO BOTAFOGO SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
17
+ # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
18
+ # SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS".
19
+ # RODRIGO BOTAFOGO HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS,
20
+ # OR MODIFICATIONS.
21
+ ##########################################################################################
22
+
23
+ require 'rubygems'
24
+ require 'test/unit'
25
+ require 'shoulda'
26
+
27
+ require_relative '../config'
28
+
29
+ require 'jcsv'
30
+
31
+ class CSVTest < Test::Unit::TestCase
32
+
33
+ context "CSV test" do
34
+
35
+ setup do
36
+
37
+ end
38
+
39
+ #-------------------------------------------------------------------------------------
40
+ #
41
+ #-------------------------------------------------------------------------------------
42
+
43
+ should "parse a csv file the quick way without headers" do
44
+
45
+ # Setting headers to false, will read the header as a normal line
46
+ reader = Jcsv.reader("../data/customer_nh.csv", headers: false)
47
+
48
+ # read the whole file in one piece.
49
+ content = reader.read
50
+ # p content
51
+
52
+ assert_equal(["1", "John", "Dunbar", "13/06/1945",
53
+ "1600 Amphitheatre Parkway\nMountain View, CA 94043\nUnited States",
54
+ nil, nil, "\"May the Force be with you.\" - Star Wars",
55
+ "jdunbar@gmail.com", "0"], content[0])
56
+ end
57
+
58
+ #-------------------------------------------------------------------------------------
59
+ #
60
+ #-------------------------------------------------------------------------------------
61
+
62
+ should "process headerless files with filters" do
63
+
64
+ # Setting headers to false, will read the header as a normal line
65
+ reader = Jcsv.reader("../data/customer_nh.csv", headers: false)
66
+
67
+ # Filters need to match the column by position, since there is no header to allow
68
+ # matching by names. Columns indexed after the last filter will not be filtered
69
+ # in any way. In the example bellow, no filter will be applied on column 5 and
70
+ # after
71
+ reader.filters = [Jcsv.optional >> Jcsv.int, Jcsv.not_nil, Jcsv.not_nil,
72
+ Jcsv.optional >> Jcsv.date("dd/MM/yyyy")]
73
+
74
+ # read the whole file in one piece.
75
+ content = reader.read
76
+ assert_equal(1, content[0][0])
77
+ assert_equal(DateTime.parse("13/06/1945"), content[0][3])
78
+
79
+ end
80
+
81
+ #-------------------------------------------------------------------------------------
82
+ #
83
+ #-------------------------------------------------------------------------------------
84
+
85
+ should "allow adding custom headers to headerless files" do
86
+
87
+ # Setting headers to false, will read the header as a normal line
88
+ reader = Jcsv.reader("../data/customer_nh.csv", headers: false,
89
+ custom_headers:
90
+ ["customerNo", "firstName", "lastName", "birthDate",
91
+ "mailingAddress", "married", "numberOfKids",
92
+ "favouriteQuote", "email", "loyaltyPoints"])
93
+
94
+ # Add filters, so that we get 'objects' instead of strings for filtered fields
95
+ reader.filters = {:number_of_kids => Jcsv.optional >> Jcsv.int,
96
+ :married => Jcsv.optional >> Jcsv.bool,
97
+ :customer_no => Jcsv.int,
98
+ :birth_date => Jcsv.date("dd/MM/yyyy")}
99
+
100
+ reader.read do |line_no, row_no, row, headers|
101
+
102
+ # First field is customer number, which is converted to int
103
+ assert_equal(1, row[0]) if row_no == 1
104
+ assert_equal("John", row[1]) if row_no == 1
105
+ # Field 5 is :married. It is optional, so leaving it blank (nil) is ok.
106
+ assert_equal(nil, row[5]) if row_no == 1
107
+
108
+ # notice that field married that was "Y" is now true. Number of kids is not "0",
109
+ # but 0, customerNo is also and int
110
+ assert_equal(true, row[5]) if row_no == 2
111
+
112
+ end
113
+
114
+ end
115
+
116
+ #-------------------------------------------------------------------------------------
117
+ #
118
+ #-------------------------------------------------------------------------------------
119
+
120
+ should "Read headerless files with map if given custom_headers" do
121
+
122
+ # Setting headers to false, will read the header as a normal line
123
+ reader = Jcsv.reader("../data/customer_nh.csv", headers: false, format: :map,
124
+ custom_headers:
125
+ ["customerNo", "firstName", "lastName", "birthDate",
126
+ "mailingAddress", "married", "numberOfKids",
127
+ "favouriteQuote", "email", "loyaltyPoints"],
128
+ default_filter: Jcsv.not_nil)
129
+
130
+ # Set numberOfKids and married as optional, otherwise an exception will be raised
131
+ reader.filters = {:number_of_kids => Jcsv.optional >> Jcsv.int,
132
+ :married => Jcsv.optional >> Jcsv.bool,
133
+ :loyalty_points => Jcsv.long,
134
+ :customer_no => Jcsv.int,
135
+ :birth_date => Jcsv.date("dd/MM/yyyy")}
136
+
137
+ # When parsing to map, it is possible to make a mapping. If column name is :false
138
+ # the column will be removed from the returned row
139
+ reader.mapping = {:number_of_kids => :numero_criancas,
140
+ :married => "casado",
141
+ :loyalty_points => "pontos fidelidade",
142
+ :customer_no => false}
143
+
144
+ reader.read do |line_no, row_no, row, headers|
145
+ if (row_no == 5)
146
+ assert_equal(nil, row[:customer_no])
147
+ assert_equal("Bill", row[:first_name])
148
+ assert_equal(true, row["casado"])
149
+ assert_equal("1973-07-10T00:00:00+00:00", row[:birth_date].to_s)
150
+ assert_equal("2701 San Tomas Expressway\nSanta Clara, CA 95050\nUnited States",
151
+ row[:mailing_address])
152
+ assert_equal(3, row[:numero_criancas])
153
+ end
154
+
155
+ end
156
+
157
+ end
158
+
159
+ end
160
+
161
+ end
@@ -0,0 +1,264 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ ##########################################################################################
4
+ # Copyright © 2015 Rodrigo Botafogo. All Rights Reserved. Permission to use, copy, modify,
5
+ # and distribute this software and its documentation for educational, research, and
6
+ # not-for-profit purposes, without fee and without a signed licensing agreement, is hereby
7
+ # granted, provided that the above copyright notice, this paragraph and the following two
8
+ # paragraphs appear in all copies, modifications, and distributions. Contact Rodrigo
9
+ # Botafogo - rodrigo.a.botafogo@gmail.com for commercial licensing opportunities.
10
+ #
11
+ # IN NO EVENT SHALL RODRIGO BOTAFOGO BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
12
+ # INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
13
+ # THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF RODRIGO BOTAFOGO HAS BEEN ADVISED OF THE
14
+ # POSSIBILITY OF SUCH DAMAGE.
15
+ #
16
+ # RODRIGO BOTAFOGO SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
17
+ # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
18
+ # SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS".
19
+ # RODRIGO BOTAFOGO HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS,
20
+ # OR MODIFICATIONS.
21
+ ##########################################################################################
22
+
23
+ require 'rubygems'
24
+ require 'test/unit'
25
+ require 'shoulda'
26
+ require 'matrix'
27
+
28
+ require_relative '../config'
29
+
30
+ require 'jcsv'
31
+
32
+ class CSVTest < Test::Unit::TestCase
33
+
34
+ context "CSV test" do
35
+
36
+ setup do
37
+
38
+ end
39
+
40
+ #-------------------------------------------------------------------------------------
41
+ # When reading the CSV file in one big chunk and selecting deep_map: true, then each
42
+ # dimension will be hashed across all rows. [This is not clear at all!!! IMPROVE.]
43
+ #-------------------------------------------------------------------------------------
44
+
45
+ should "parse multi-dimension csv file to map, chuk_size all and deep_map true" do
46
+
47
+ reader = Jcsv.reader("../data/epilepsy.csv", format: :map, chunk_size: :all,
48
+ dimensions: [:treatment, :subject, :period], deep_map: true)
49
+
50
+ # remove the :patient field from the data, as this field is already given by the
51
+ # :subject field.
52
+ reader.mapping = {:patient => false}
53
+
54
+ # since we are reading with chunk_size = :all, then we will only get one chunk back.
55
+ # Then we can get the first chunk by indexing read with 0: reader.read[0]
56
+ treatment = reader.read[0]
57
+ # p treatment
58
+
59
+ # get the dimensions
60
+ treatment_type = reader.dimensions[:treatment]
61
+ subject = reader.dimensions[:subject]
62
+ period = reader.dimensions[:period]
63
+
64
+ # variable labels has all dimension labels
65
+ assert_equal(0, treatment_type.labels["placebo"])
66
+ assert_equal(1, treatment_type.labels["Progabide"])
67
+ assert_equal(1, subject.labels["2"])
68
+ assert_equal(13, subject.labels["14"])
69
+ assert_equal(58, subject.labels["59"])
70
+ assert_equal(0, period.labels["1"])
71
+ assert_equal(3, period.labels["4"])
72
+
73
+ assert_equal("14", treatment["placebo"]["10"]["1"][:"seizure.rate"])
74
+
75
+ end
76
+
77
+ #-------------------------------------------------------------------------------------
78
+ #
79
+ #-------------------------------------------------------------------------------------
80
+
81
+ should "read data with dimensions, mapping and filters" do
82
+
83
+ reader = Jcsv.reader("../data/epilepsy.csv", format: :map, chunk_size: :all,
84
+ dimensions: [:treatment, :subject, :period], deep_map: true,
85
+ default_filter: Jcsv.int)
86
+
87
+ # remove the :patient field from the data, as this field is already given by the
88
+ # :subject field.
89
+ reader.mapping = {:patient => false}
90
+ reader.filters = {:"seizure.rate" => Jcsv.float}
91
+
92
+ # will raise an exception as :period is not a key. Will break as soon as we read the
93
+ # first period for the second user
94
+ treatment = reader.read[0]
95
+ # p treatment
96
+
97
+ assert_equal(14.0, treatment["placebo"]["10"]["1"][:"seizure.rate"])
98
+ assert_equal(19.0, treatment["Progabide"]["45"]["1"][:"seizure.rate"])
99
+
100
+ end
101
+
102
+ #-------------------------------------------------------------------------------------
103
+ #
104
+ #-------------------------------------------------------------------------------------
105
+
106
+ should "read data with deep_map but chunk_size not all" do
107
+
108
+ reader = Jcsv.reader("../data/epilepsy.csv", format: :map, chunk_size: 20,
109
+ dimensions: [:treatment, :subject, :period], deep_map: true,
110
+ default_filter: Jcsv.int)
111
+
112
+ # remove the :patient field from the data, as this field is already given by the
113
+ # :subject field.
114
+ reader.mapping = {:patient => false}
115
+ reader.filters = {:"seizure.rate" => Jcsv.float}
116
+
117
+ # will raise an exception as :period is not a key. Will break as soon as we read the
118
+ # first period for the second user
119
+ treatment = reader.read
120
+
121
+ assert_equal(3.0, treatment[0]["placebo"]["2"]["1"][:"seizure.rate"])
122
+ # since only 20 rows read per chunk, there is no Progabide row yet. Note that there
123
+ # was data in the test above
124
+ assert_equal(nil, treatment[0]["Progabide"])
125
+
126
+ # chunk 10, has Progabide as a dimension
127
+ assert_equal(6.0, treatment[10]["Progabide"]["51"]["2"][:"seizure.rate"])
128
+
129
+ end
130
+
131
+ #-------------------------------------------------------------------------------------
132
+ #
133
+ #-------------------------------------------------------------------------------------
134
+
135
+ should "raise exception if key is repeated" do
136
+
137
+ reader = Jcsv.reader("../data/epilepsy.csv", format: :map, chunk_size: :all,
138
+ dimensions: [:period], deep_map: true)
139
+
140
+ # will raise an exception as :period is not a key. Will break as soon as we read the
141
+ # first period for the second user
142
+ assert_raise ( Jcsv::DuplicateKeyError ) { reader.read[0] }
143
+
144
+ end
145
+
146
+ #-------------------------------------------------------------------------------------
147
+ # When reading the CSV file in one big chunk and selecting deep_map: true, then each
148
+ # dimension will be hashed across all rows. [This is not clear at all!!! IMPROVE.]
149
+ #-------------------------------------------------------------------------------------
150
+
151
+ should "Show errors when dimensions are not in order or missing" do
152
+
153
+ reader = Jcsv.reader("../data/epilepsy.csv", format: :map, chunk_size: :all,
154
+ dimensions: [:period, :treatment, :subject], deep_map: true)
155
+
156
+ p "LOTS OF ERROR MESSAGES EXPECTED FROM HERE..."
157
+
158
+ # remove the :patient field from the data, as this field is already given by the
159
+ # :subject field.
160
+ reader.mapping = {:patient => false}
161
+
162
+ # since we are reading with chunk_size = :all, then we will only get one chunk back.
163
+ # Then we can get the first chunk by indexing read with 0: reader.read[0]
164
+ treatment = reader.read[0]
165
+
166
+ p "... TO HERE. If no error messages, then something is wrong!"
167
+
168
+ end
169
+
170
+ #-------------------------------------------------------------------------------------
171
+ # When reading the CSV file in one big chunk and selecting deep_map: true, then each
172
+ # dimension will be hashed across all rows. [This is not clear at all!!! IMPROVE.]
173
+ #-------------------------------------------------------------------------------------
174
+
175
+ should "Suppress warnings when dimensions are not in order or missing" do
176
+
177
+ reader = Jcsv.reader("../data/epilepsy.csv", format: :map, chunk_size: :all,
178
+ dimensions: [:period, :treatment, :subject], deep_map: true,
179
+ suppress_warnings: true)
180
+
181
+ p "No warning messages should be seen from here..."
182
+
183
+ # remove the :patient field from the data, as this field is already given by the
184
+ # :subject field.
185
+ reader.mapping = {:patient => false}
186
+
187
+ # since we are reading with chunk_size = :all, then we will only get one chunk back.
188
+ # Then we can get the first chunk by indexing read with 0: reader.read[0]
189
+ treatment = reader.read
190
+ # p treatment
191
+
192
+ p "... to here. If there are any warning messages then there is something wrong!"
193
+
194
+ end
195
+
196
+ #-------------------------------------------------------------------------------------
197
+ # There is a large difference when parsing multidimensional CSV files with chunks and
198
+ # no chunks. When no chunks are selected, this is identical to normal dimension
199
+ # reading.
200
+ #-------------------------------------------------------------------------------------
201
+
202
+ should "parse multi-dimension csv file to map no chunk" do
203
+
204
+ reader = Jcsv.reader("../data/epilepsy.csv", format: :map,
205
+ dimensions: [:treatment, :subject, :period], deep_map: true)
206
+
207
+ # remove the :patient field from the data, as this field is already given by the
208
+ # :subject field.
209
+ reader.mapping = {:patient => false}
210
+
211
+ # since we are reading with chunk_size = :all, then we will only get one chunk back.
212
+ # Then we can get the first chunk by indexing read with 0: reader.read[0]
213
+ treatment = reader.read
214
+ # p treatment
215
+
216
+ assert_equal("11", treatment["placebo.1.1"][:base])
217
+ assert_equal("31", treatment["placebo.1.1"][:age])
218
+ assert_equal("5", treatment["placebo.1.1"][:"seizure.rate"])
219
+
220
+ assert_equal("11", treatment["placebo.1.2"][:base])
221
+ assert_equal("31", treatment["placebo.1.2"][:age])
222
+ assert_equal("3", treatment["placebo.1.2"][:"seizure.rate"])
223
+
224
+ end
225
+
226
+ #-------------------------------------------------------------------------------------
227
+ # All examples until now had chunk_size :all, but they can have smaller size. In this
228
+ # example, chunk_size is 20 and it is processed by a block
229
+ #-------------------------------------------------------------------------------------
230
+
231
+ should "read with dimension and given a block" do
232
+
233
+ reader = Jcsv.reader("../data/epilepsy.csv", format: :map, chunk_size: 20,
234
+ dimensions: [:treatment, :subject, :period], deep_map: true,
235
+ default_filter: Jcsv.int)
236
+
237
+ reader.mapping = {:patient => false}
238
+
239
+ reader.read do |line_no, row_no, chunk|
240
+ p line_no
241
+ p row_no
242
+ p chunk
243
+ end
244
+
245
+ end
246
+ =begin
247
+ #-------------------------------------------------------------------------------------
248
+ #
249
+ #-------------------------------------------------------------------------------------
250
+
251
+ should "read dimensions to lists" do
252
+
253
+ reader = Jcsv.reader("epilepsy.csv", chunk_size: :all, deep_map: true,
254
+ dimensions: [:treatment, :subject, :period])
255
+
256
+ table = reader.read
257
+ # p table
258
+
259
+ end
260
+ =end
261
+
262
+ end
263
+
264
+ end
@@ -0,0 +1,73 @@
1
+ hash = {}
2
+
3
+ key = "placebo.john.1"
4
+ key.split('.').reduce(hash) { |h,m| h[m] ||= {} }
5
+
6
+ *key, last = key.split(".")
7
+ key.inject(hash, :fetch)[last] = {a: 1, b:2, c: 3}
8
+
9
+ key = "placebo.john.2"
10
+ key.split('.').reduce(hash) { |h,m| h[m] ||= {} }
11
+
12
+ *key, last = key.split(".")
13
+ key.inject(hash, :fetch)[last] = {a: 10, b:20, c: 30}
14
+
15
+ puts hash #=> {"one"=>{"two"=>{"three"=>{}}}}
16
+ p hash["placebo"]["john"]["2"]
17
+
18
+ =begin
19
+
20
+ require 'hashie'
21
+
22
+ cl = Hashie::Clash.new
23
+
24
+ cl.placebo!.john!.p1(a: 1, b: 2, c: 3)
25
+ #cl.placebo!.john!.p2(a: 10, b: 20, c: 30)
26
+
27
+ p cl
28
+
29
+ =end
30
+
31
+
32
+
33
+ rh = Hash.new {|h,k| h[k] = Hash.new(&h.default_proc) }
34
+
35
+ h = Hash.new
36
+
37
+ =begin
38
+ h["placebo"] ||= Hash.new
39
+ h["med"] ||= Hash.new
40
+ h["placebo"]["john"] ||= Hash.new
41
+ h["placebo"]["john"][1] ||= Hash.new
42
+
43
+ h["placebo"] ||= Hash.new
44
+ h["placebo"]["john"] ||= Hash.new
45
+ h["placebo"]["john"][2] ||= Hash.new
46
+
47
+ h["placebo"]["john"][1] = {a: 1, b: 2, c: 3}
48
+ h["placebo"]["john"][2] = {a: 2, b: 10, c: 50}
49
+
50
+ p h["placebo"]
51
+ =end
52
+
53
+ =begin
54
+ h["placebo"] ||= Hash.new
55
+ h["placebo"]["john"] ||= Hash.new
56
+ h["placebo"]["john"]["1"] ||= Hash.new
57
+
58
+ key = "placebo.john.1"
59
+
60
+ *key, last = key.split(".")
61
+ key.inject(h, :fetch)[last] = {a: 1, b:2, c: 3}
62
+
63
+ h["placebo"] ||= Hash.new
64
+ h["placebo"]["john"] ||= Hash.new
65
+ h["placebo"]["john"]["2"] ||= Hash.new
66
+
67
+ key = "placebo.john.2"
68
+
69
+ *key, last = key.split(".")
70
+ key.inject(h, :fetch)[last] = {a: 10, b:20, c: 30}
71
+
72
+ p h["placebo"]["john"]["2"]
73
+ =end