mdarray-jcsv 0.6.3-java

Sign up to get free protection for your applications and to get access to all the features.
Files changed (44) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +23 -0
  3. data/README.md +2 -0
  4. data/Rakefile +46 -0
  5. data/config.rb +104 -0
  6. data/lib/constraints.rb +205 -0
  7. data/lib/date_filters.rb +252 -0
  8. data/lib/dimensions.rb +276 -0
  9. data/lib/filters.rb +332 -0
  10. data/lib/jcsv.rb +107 -0
  11. data/lib/list_reader.rb +200 -0
  12. data/lib/locale.rb +192 -0
  13. data/lib/map_reader.rb +192 -0
  14. data/lib/mdarray-jcsv.rb +24 -0
  15. data/lib/mdarray_reader.rb +110 -0
  16. data/lib/numeric_filters.rb +225 -0
  17. data/lib/reader.rb +547 -0
  18. data/lib/supercsv_interface.rb +231 -0
  19. data/test/test_complete.rb +37 -0
  20. data/test/test_critbit.rb +442 -0
  21. data/test/test_customer_list.rb +436 -0
  22. data/test/test_customer_map.rb +209 -0
  23. data/test/test_customer_nhlist.rb +161 -0
  24. data/test/test_deep_map.rb +264 -0
  25. data/test/test_del.rb +73 -0
  26. data/test/test_dimensions.rb +231 -0
  27. data/test/test_example.rb +79 -0
  28. data/test/test_filters.rb +374 -0
  29. data/test/test_list_dimensions.rb +110 -0
  30. data/test/test_mdarray.rb +227 -0
  31. data/test/test_missing_data.rb +57 -0
  32. data/vendor/commons-beanutils-1.8.3.jar +0 -0
  33. data/vendor/commons-lang3-3.1.jar +0 -0
  34. data/vendor/dozer-5.4.0.jar +0 -0
  35. data/vendor/jcl-over-slf4j-1.6.6.jar +0 -0
  36. data/vendor/joda-time-2.7.jar +0 -0
  37. data/vendor/slf4j-api-1.7.5.jar +0 -0
  38. data/vendor/snakeyaml-1.14.jar +0 -0
  39. data/vendor/super-csv-2.4.0.jar +0 -0
  40. data/vendor/super-csv-dozer-2.4.0.jar +0 -0
  41. data/vendor/super-csv-java8-2.4.0.jar +0 -0
  42. data/vendor/super-csv-joda-2.4.0.jar +0 -0
  43. data/version.rb +2 -0
  44. metadata +196 -0
@@ -0,0 +1,161 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ ##########################################################################################
4
+ # Copyright © 2015 Rodrigo Botafogo. All Rights Reserved. Permission to use, copy, modify,
5
+ # and distribute this software and its documentation for educational, research, and
6
+ # not-for-profit purposes, without fee and without a signed licensing agreement, is hereby
7
+ # granted, provided that the above copyright notice, this paragraph and the following two
8
+ # paragraphs appear in all copies, modifications, and distributions. Contact Rodrigo
9
+ # Botafogo - rodrigo.a.botafogo@gmail.com for commercial licensing opportunities.
10
+ #
11
+ # IN NO EVENT SHALL RODRIGO BOTAFOGO BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
12
+ # INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
13
+ # THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF RODRIGO BOTAFOGO HAS BEEN ADVISED OF THE
14
+ # POSSIBILITY OF SUCH DAMAGE.
15
+ #
16
+ # RODRIGO BOTAFOGO SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
17
+ # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
18
+ # SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS".
19
+ # RODRIGO BOTAFOGO HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS,
20
+ # OR MODIFICATIONS.
21
+ ##########################################################################################
22
+
23
+ require 'rubygems'
24
+ require 'test/unit'
25
+ require 'shoulda'
26
+
27
+ require_relative '../config'
28
+
29
+ require 'jcsv'
30
+
31
+ class CSVTest < Test::Unit::TestCase
32
+
33
+ context "CSV test" do
34
+
35
+ setup do
36
+
37
+ end
38
+
39
+ #-------------------------------------------------------------------------------------
40
+ #
41
+ #-------------------------------------------------------------------------------------
42
+
43
+ should "parse a csv file the quick way without headers" do
44
+
45
+ # Setting headers to false, will read the header as a normal line
46
+ reader = Jcsv.reader("../data/customer_nh.csv", headers: false)
47
+
48
+ # read the whole file in one piece.
49
+ content = reader.read
50
+ # p content
51
+
52
+ assert_equal(["1", "John", "Dunbar", "13/06/1945",
53
+ "1600 Amphitheatre Parkway\nMountain View, CA 94043\nUnited States",
54
+ nil, nil, "\"May the Force be with you.\" - Star Wars",
55
+ "jdunbar@gmail.com", "0"], content[0])
56
+ end
57
+
58
+ #-------------------------------------------------------------------------------------
59
+ #
60
+ #-------------------------------------------------------------------------------------
61
+
62
+ should "process headerless files with filters" do
63
+
64
+ # Setting headers to false, will read the header as a normal line
65
+ reader = Jcsv.reader("../data/customer_nh.csv", headers: false)
66
+
67
+ # Filters need to match the column by position, since there is no header to allow
68
+ # matching by names. Columns indexed after the last filter will not be filtered
69
+ # in any way. In the example bellow, no filter will be applied on column 5 and
70
+ # after
71
+ reader.filters = [Jcsv.optional >> Jcsv.int, Jcsv.not_nil, Jcsv.not_nil,
72
+ Jcsv.optional >> Jcsv.date("dd/MM/yyyy")]
73
+
74
+ # read the whole file in one piece.
75
+ content = reader.read
76
+ assert_equal(1, content[0][0])
77
+ assert_equal(DateTime.parse("13/06/1945"), content[0][3])
78
+
79
+ end
80
+
81
+ #-------------------------------------------------------------------------------------
82
+ #
83
+ #-------------------------------------------------------------------------------------
84
+
85
+ should "allow adding custom headers to headerless files" do
86
+
87
+ # Setting headers to false, will read the header as a normal line
88
+ reader = Jcsv.reader("../data/customer_nh.csv", headers: false,
89
+ custom_headers:
90
+ ["customerNo", "firstName", "lastName", "birthDate",
91
+ "mailingAddress", "married", "numberOfKids",
92
+ "favouriteQuote", "email", "loyaltyPoints"])
93
+
94
+ # Add filters, so that we get 'objects' instead of strings for filtered fields
95
+ reader.filters = {:number_of_kids => Jcsv.optional >> Jcsv.int,
96
+ :married => Jcsv.optional >> Jcsv.bool,
97
+ :customer_no => Jcsv.int,
98
+ :birth_date => Jcsv.date("dd/MM/yyyy")}
99
+
100
+ reader.read do |line_no, row_no, row, headers|
101
+
102
+ # First field is customer number, which is converted to int
103
+ assert_equal(1, row[0]) if row_no == 1
104
+ assert_equal("John", row[1]) if row_no == 1
105
+ # Field 5 is :married. It is optional, so leaving it blank (nil) is ok.
106
+ assert_equal(nil, row[5]) if row_no == 1
107
+
108
+ # notice that field married that was "Y" is now true. Number of kids is not "0",
109
+ # but 0, customerNo is also and int
110
+ assert_equal(true, row[5]) if row_no == 2
111
+
112
+ end
113
+
114
+ end
115
+
116
+ #-------------------------------------------------------------------------------------
117
+ #
118
+ #-------------------------------------------------------------------------------------
119
+
120
+ should "Read headerless files with map if given custom_headers" do
121
+
122
+ # Setting headers to false, will read the header as a normal line
123
+ reader = Jcsv.reader("../data/customer_nh.csv", headers: false, format: :map,
124
+ custom_headers:
125
+ ["customerNo", "firstName", "lastName", "birthDate",
126
+ "mailingAddress", "married", "numberOfKids",
127
+ "favouriteQuote", "email", "loyaltyPoints"],
128
+ default_filter: Jcsv.not_nil)
129
+
130
+ # Set numberOfKids and married as optional, otherwise an exception will be raised
131
+ reader.filters = {:number_of_kids => Jcsv.optional >> Jcsv.int,
132
+ :married => Jcsv.optional >> Jcsv.bool,
133
+ :loyalty_points => Jcsv.long,
134
+ :customer_no => Jcsv.int,
135
+ :birth_date => Jcsv.date("dd/MM/yyyy")}
136
+
137
+ # When parsing to map, it is possible to make a mapping. If column name is :false
138
+ # the column will be removed from the returned row
139
+ reader.mapping = {:number_of_kids => :numero_criancas,
140
+ :married => "casado",
141
+ :loyalty_points => "pontos fidelidade",
142
+ :customer_no => false}
143
+
144
+ reader.read do |line_no, row_no, row, headers|
145
+ if (row_no == 5)
146
+ assert_equal(nil, row[:customer_no])
147
+ assert_equal("Bill", row[:first_name])
148
+ assert_equal(true, row["casado"])
149
+ assert_equal("1973-07-10T00:00:00+00:00", row[:birth_date].to_s)
150
+ assert_equal("2701 San Tomas Expressway\nSanta Clara, CA 95050\nUnited States",
151
+ row[:mailing_address])
152
+ assert_equal(3, row[:numero_criancas])
153
+ end
154
+
155
+ end
156
+
157
+ end
158
+
159
+ end
160
+
161
+ end
@@ -0,0 +1,264 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ ##########################################################################################
4
+ # Copyright © 2015 Rodrigo Botafogo. All Rights Reserved. Permission to use, copy, modify,
5
+ # and distribute this software and its documentation for educational, research, and
6
+ # not-for-profit purposes, without fee and without a signed licensing agreement, is hereby
7
+ # granted, provided that the above copyright notice, this paragraph and the following two
8
+ # paragraphs appear in all copies, modifications, and distributions. Contact Rodrigo
9
+ # Botafogo - rodrigo.a.botafogo@gmail.com for commercial licensing opportunities.
10
+ #
11
+ # IN NO EVENT SHALL RODRIGO BOTAFOGO BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
12
+ # INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
13
+ # THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF RODRIGO BOTAFOGO HAS BEEN ADVISED OF THE
14
+ # POSSIBILITY OF SUCH DAMAGE.
15
+ #
16
+ # RODRIGO BOTAFOGO SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
17
+ # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
18
+ # SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS".
19
+ # RODRIGO BOTAFOGO HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS,
20
+ # OR MODIFICATIONS.
21
+ ##########################################################################################
22
+
23
+ require 'rubygems'
24
+ require 'test/unit'
25
+ require 'shoulda'
26
+ require 'matrix'
27
+
28
+ require_relative '../config'
29
+
30
+ require 'jcsv'
31
+
32
+ class CSVTest < Test::Unit::TestCase
33
+
34
+ context "CSV test" do
35
+
36
+ setup do
37
+
38
+ end
39
+
40
+ #-------------------------------------------------------------------------------------
41
+ # When reading the CSV file in one big chunk and selecting deep_map: true, then each
42
+ # dimension will be hashed across all rows. [This is not clear at all!!! IMPROVE.]
43
+ #-------------------------------------------------------------------------------------
44
+
45
+ should "parse multi-dimension csv file to map, chuk_size all and deep_map true" do
46
+
47
+ reader = Jcsv.reader("../data/epilepsy.csv", format: :map, chunk_size: :all,
48
+ dimensions: [:treatment, :subject, :period], deep_map: true)
49
+
50
+ # remove the :patient field from the data, as this field is already given by the
51
+ # :subject field.
52
+ reader.mapping = {:patient => false}
53
+
54
+ # since we are reading with chunk_size = :all, then we will only get one chunk back.
55
+ # Then we can get the first chunk by indexing read with 0: reader.read[0]
56
+ treatment = reader.read[0]
57
+ # p treatment
58
+
59
+ # get the dimensions
60
+ treatment_type = reader.dimensions[:treatment]
61
+ subject = reader.dimensions[:subject]
62
+ period = reader.dimensions[:period]
63
+
64
+ # variable labels has all dimension labels
65
+ assert_equal(0, treatment_type.labels["placebo"])
66
+ assert_equal(1, treatment_type.labels["Progabide"])
67
+ assert_equal(1, subject.labels["2"])
68
+ assert_equal(13, subject.labels["14"])
69
+ assert_equal(58, subject.labels["59"])
70
+ assert_equal(0, period.labels["1"])
71
+ assert_equal(3, period.labels["4"])
72
+
73
+ assert_equal("14", treatment["placebo"]["10"]["1"][:"seizure.rate"])
74
+
75
+ end
76
+
77
+ #-------------------------------------------------------------------------------------
78
+ #
79
+ #-------------------------------------------------------------------------------------
80
+
81
+ should "read data with dimensions, mapping and filters" do
82
+
83
+ reader = Jcsv.reader("../data/epilepsy.csv", format: :map, chunk_size: :all,
84
+ dimensions: [:treatment, :subject, :period], deep_map: true,
85
+ default_filter: Jcsv.int)
86
+
87
+ # remove the :patient field from the data, as this field is already given by the
88
+ # :subject field.
89
+ reader.mapping = {:patient => false}
90
+ reader.filters = {:"seizure.rate" => Jcsv.float}
91
+
92
+ # will raise an exception as :period is not a key. Will break as soon as we read the
93
+ # first period for the second user
94
+ treatment = reader.read[0]
95
+ # p treatment
96
+
97
+ assert_equal(14.0, treatment["placebo"]["10"]["1"][:"seizure.rate"])
98
+ assert_equal(19.0, treatment["Progabide"]["45"]["1"][:"seizure.rate"])
99
+
100
+ end
101
+
102
+ #-------------------------------------------------------------------------------------
103
+ #
104
+ #-------------------------------------------------------------------------------------
105
+
106
+ should "read data with deep_map but chunk_size not all" do
107
+
108
+ reader = Jcsv.reader("../data/epilepsy.csv", format: :map, chunk_size: 20,
109
+ dimensions: [:treatment, :subject, :period], deep_map: true,
110
+ default_filter: Jcsv.int)
111
+
112
+ # remove the :patient field from the data, as this field is already given by the
113
+ # :subject field.
114
+ reader.mapping = {:patient => false}
115
+ reader.filters = {:"seizure.rate" => Jcsv.float}
116
+
117
+ # will raise an exception as :period is not a key. Will break as soon as we read the
118
+ # first period for the second user
119
+ treatment = reader.read
120
+
121
+ assert_equal(3.0, treatment[0]["placebo"]["2"]["1"][:"seizure.rate"])
122
+ # since only 20 rows read per chunk, there is no Progabide row yet. Note that there
123
+ # was data in the test above
124
+ assert_equal(nil, treatment[0]["Progabide"])
125
+
126
+ # chunk 10, has Progabide as a dimension
127
+ assert_equal(6.0, treatment[10]["Progabide"]["51"]["2"][:"seizure.rate"])
128
+
129
+ end
130
+
131
+ #-------------------------------------------------------------------------------------
132
+ #
133
+ #-------------------------------------------------------------------------------------
134
+
135
+ should "raise exception if key is repeated" do
136
+
137
+ reader = Jcsv.reader("../data/epilepsy.csv", format: :map, chunk_size: :all,
138
+ dimensions: [:period], deep_map: true)
139
+
140
+ # will raise an exception as :period is not a key. Will break as soon as we read the
141
+ # first period for the second user
142
+ assert_raise ( Jcsv::DuplicateKeyError ) { reader.read[0] }
143
+
144
+ end
145
+
146
+ #-------------------------------------------------------------------------------------
147
+ # When reading the CSV file in one big chunk and selecting deep_map: true, then each
148
+ # dimension will be hashed across all rows. [This is not clear at all!!! IMPROVE.]
149
+ #-------------------------------------------------------------------------------------
150
+
151
+ should "Show errors when dimensions are not in order or missing" do
152
+
153
+ reader = Jcsv.reader("../data/epilepsy.csv", format: :map, chunk_size: :all,
154
+ dimensions: [:period, :treatment, :subject], deep_map: true)
155
+
156
+ p "LOTS OF ERROR MESSAGES EXPECTED FROM HERE..."
157
+
158
+ # remove the :patient field from the data, as this field is already given by the
159
+ # :subject field.
160
+ reader.mapping = {:patient => false}
161
+
162
+ # since we are reading with chunk_size = :all, then we will only get one chunk back.
163
+ # Then we can get the first chunk by indexing read with 0: reader.read[0]
164
+ treatment = reader.read[0]
165
+
166
+ p "... TO HERE. If no error messages, then something is wrong!"
167
+
168
+ end
169
+
170
+ #-------------------------------------------------------------------------------------
171
+ # When reading the CSV file in one big chunk and selecting deep_map: true, then each
172
+ # dimension will be hashed across all rows. [This is not clear at all!!! IMPROVE.]
173
+ #-------------------------------------------------------------------------------------
174
+
175
+ should "Suppress warnings when dimensions are not in order or missing" do
176
+
177
+ reader = Jcsv.reader("../data/epilepsy.csv", format: :map, chunk_size: :all,
178
+ dimensions: [:period, :treatment, :subject], deep_map: true,
179
+ suppress_warnings: true)
180
+
181
+ p "No warning messages should be seen from here..."
182
+
183
+ # remove the :patient field from the data, as this field is already given by the
184
+ # :subject field.
185
+ reader.mapping = {:patient => false}
186
+
187
+ # since we are reading with chunk_size = :all, then we will only get one chunk back.
188
+ # Then we can get the first chunk by indexing read with 0: reader.read[0]
189
+ treatment = reader.read
190
+ # p treatment
191
+
192
+ p "... to here. If there are any warning messages then there is something wrong!"
193
+
194
+ end
195
+
196
+ #-------------------------------------------------------------------------------------
197
+ # There is a large difference when parsing multidimensional CSV files with chunks and
198
+ # no chunks. When no chunks are selected, this is identical to normal dimension
199
+ # reading.
200
+ #-------------------------------------------------------------------------------------
201
+
202
+ should "parse multi-dimension csv file to map no chunk" do
203
+
204
+ reader = Jcsv.reader("../data/epilepsy.csv", format: :map,
205
+ dimensions: [:treatment, :subject, :period], deep_map: true)
206
+
207
+ # remove the :patient field from the data, as this field is already given by the
208
+ # :subject field.
209
+ reader.mapping = {:patient => false}
210
+
211
+ # since we are reading with chunk_size = :all, then we will only get one chunk back.
212
+ # Then we can get the first chunk by indexing read with 0: reader.read[0]
213
+ treatment = reader.read
214
+ # p treatment
215
+
216
+ assert_equal("11", treatment["placebo.1.1"][:base])
217
+ assert_equal("31", treatment["placebo.1.1"][:age])
218
+ assert_equal("5", treatment["placebo.1.1"][:"seizure.rate"])
219
+
220
+ assert_equal("11", treatment["placebo.1.2"][:base])
221
+ assert_equal("31", treatment["placebo.1.2"][:age])
222
+ assert_equal("3", treatment["placebo.1.2"][:"seizure.rate"])
223
+
224
+ end
225
+
226
+ #-------------------------------------------------------------------------------------
227
+ # All examples until now had chunk_size :all, but they can have smaller size. In this
228
+ # example, chunk_size is 20 and it is processed by a block
229
+ #-------------------------------------------------------------------------------------
230
+
231
+ should "read with dimension and given a block" do
232
+
233
+ reader = Jcsv.reader("../data/epilepsy.csv", format: :map, chunk_size: 20,
234
+ dimensions: [:treatment, :subject, :period], deep_map: true,
235
+ default_filter: Jcsv.int)
236
+
237
+ reader.mapping = {:patient => false}
238
+
239
+ reader.read do |line_no, row_no, chunk|
240
+ p line_no
241
+ p row_no
242
+ p chunk
243
+ end
244
+
245
+ end
246
+ =begin
247
+ #-------------------------------------------------------------------------------------
248
+ #
249
+ #-------------------------------------------------------------------------------------
250
+
251
+ should "read dimensions to lists" do
252
+
253
+ reader = Jcsv.reader("epilepsy.csv", chunk_size: :all, deep_map: true,
254
+ dimensions: [:treatment, :subject, :period])
255
+
256
+ table = reader.read
257
+ # p table
258
+
259
+ end
260
+ =end
261
+
262
+ end
263
+
264
+ end
@@ -0,0 +1,73 @@
1
+ hash = {}
2
+
3
+ key = "placebo.john.1"
4
+ key.split('.').reduce(hash) { |h,m| h[m] ||= {} }
5
+
6
+ *key, last = key.split(".")
7
+ key.inject(hash, :fetch)[last] = {a: 1, b:2, c: 3}
8
+
9
+ key = "placebo.john.2"
10
+ key.split('.').reduce(hash) { |h,m| h[m] ||= {} }
11
+
12
+ *key, last = key.split(".")
13
+ key.inject(hash, :fetch)[last] = {a: 10, b:20, c: 30}
14
+
15
+ puts hash #=> {"one"=>{"two"=>{"three"=>{}}}}
16
+ p hash["placebo"]["john"]["2"]
17
+
18
+ =begin
19
+
20
+ require 'hashie'
21
+
22
+ cl = Hashie::Clash.new
23
+
24
+ cl.placebo!.john!.p1(a: 1, b: 2, c: 3)
25
+ #cl.placebo!.john!.p2(a: 10, b: 20, c: 30)
26
+
27
+ p cl
28
+
29
+ =end
30
+
31
+
32
+
33
+ rh = Hash.new {|h,k| h[k] = Hash.new(&h.default_proc) }
34
+
35
+ h = Hash.new
36
+
37
+ =begin
38
+ h["placebo"] ||= Hash.new
39
+ h["med"] ||= Hash.new
40
+ h["placebo"]["john"] ||= Hash.new
41
+ h["placebo"]["john"][1] ||= Hash.new
42
+
43
+ h["placebo"] ||= Hash.new
44
+ h["placebo"]["john"] ||= Hash.new
45
+ h["placebo"]["john"][2] ||= Hash.new
46
+
47
+ h["placebo"]["john"][1] = {a: 1, b: 2, c: 3}
48
+ h["placebo"]["john"][2] = {a: 2, b: 10, c: 50}
49
+
50
+ p h["placebo"]
51
+ =end
52
+
53
+ =begin
54
+ h["placebo"] ||= Hash.new
55
+ h["placebo"]["john"] ||= Hash.new
56
+ h["placebo"]["john"]["1"] ||= Hash.new
57
+
58
+ key = "placebo.john.1"
59
+
60
+ *key, last = key.split(".")
61
+ key.inject(h, :fetch)[last] = {a: 1, b:2, c: 3}
62
+
63
+ h["placebo"] ||= Hash.new
64
+ h["placebo"]["john"] ||= Hash.new
65
+ h["placebo"]["john"]["2"] ||= Hash.new
66
+
67
+ key = "placebo.john.2"
68
+
69
+ *key, last = key.split(".")
70
+ key.inject(h, :fetch)[last] = {a: 10, b:20, c: 30}
71
+
72
+ p h["placebo"]["john"]["2"]
73
+ =end