mdarray-jcsv 0.6.3-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +23 -0
  3. data/README.md +2 -0
  4. data/Rakefile +46 -0
  5. data/config.rb +104 -0
  6. data/lib/constraints.rb +205 -0
  7. data/lib/date_filters.rb +252 -0
  8. data/lib/dimensions.rb +276 -0
  9. data/lib/filters.rb +332 -0
  10. data/lib/jcsv.rb +107 -0
  11. data/lib/list_reader.rb +200 -0
  12. data/lib/locale.rb +192 -0
  13. data/lib/map_reader.rb +192 -0
  14. data/lib/mdarray-jcsv.rb +24 -0
  15. data/lib/mdarray_reader.rb +110 -0
  16. data/lib/numeric_filters.rb +225 -0
  17. data/lib/reader.rb +547 -0
  18. data/lib/supercsv_interface.rb +231 -0
  19. data/test/test_complete.rb +37 -0
  20. data/test/test_critbit.rb +442 -0
  21. data/test/test_customer_list.rb +436 -0
  22. data/test/test_customer_map.rb +209 -0
  23. data/test/test_customer_nhlist.rb +161 -0
  24. data/test/test_deep_map.rb +264 -0
  25. data/test/test_del.rb +73 -0
  26. data/test/test_dimensions.rb +231 -0
  27. data/test/test_example.rb +79 -0
  28. data/test/test_filters.rb +374 -0
  29. data/test/test_list_dimensions.rb +110 -0
  30. data/test/test_mdarray.rb +227 -0
  31. data/test/test_missing_data.rb +57 -0
  32. data/vendor/commons-beanutils-1.8.3.jar +0 -0
  33. data/vendor/commons-lang3-3.1.jar +0 -0
  34. data/vendor/dozer-5.4.0.jar +0 -0
  35. data/vendor/jcl-over-slf4j-1.6.6.jar +0 -0
  36. data/vendor/joda-time-2.7.jar +0 -0
  37. data/vendor/slf4j-api-1.7.5.jar +0 -0
  38. data/vendor/snakeyaml-1.14.jar +0 -0
  39. data/vendor/super-csv-2.4.0.jar +0 -0
  40. data/vendor/super-csv-dozer-2.4.0.jar +0 -0
  41. data/vendor/super-csv-java8-2.4.0.jar +0 -0
  42. data/vendor/super-csv-joda-2.4.0.jar +0 -0
  43. data/version.rb +2 -0
  44. metadata +196 -0
@@ -0,0 +1,231 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ ##########################################################################################
4
+ # Copyright © 2015 Rodrigo Botafogo. All Rights Reserved. Permission to use, copy, modify,
5
+ # and distribute this software and its documentation for educational, research, and
6
+ # not-for-profit purposes, without fee and without a signed licensing agreement, is hereby
7
+ # granted, provided that the above copyright notice, this paragraph and the following two
8
+ # paragraphs appear in all copies, modifications, and distributions. Contact Rodrigo
9
+ # Botafogo - rodrigo.a.botafogo@gmail.com for commercial licensing opportunities.
10
+ #
11
+ # IN NO EVENT SHALL RODRIGO BOTAFOGO BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
12
+ # INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
13
+ # THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF RODRIGO BOTAFOGO HAS BEEN ADVISED OF THE
14
+ # POSSIBILITY OF SUCH DAMAGE.
15
+ #
16
+ # RODRIGO BOTAFOGO SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
17
+ # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
18
+ # SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS".
19
+ # RODRIGO BOTAFOGO HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS,
20
+ # OR MODIFICATIONS.
21
+ ##########################################################################################
22
+
23
+ require 'test/unit'
24
+ require 'shoulda'
25
+
26
+ require_relative '../config'
27
+
28
+ require 'jcsv'
29
+
30
+ class CSVTest < Test::Unit::TestCase
31
+
32
+ context "CSV test" do
33
+
34
+ setup do
35
+
36
+ end
37
+
38
+ #-------------------------------------------------------------------------------------
39
+ # Read data into a flat map. Allows random access to the data by use of the map
40
+ # 'key'. The 'key' is a string that concatenates the values of the dimensions's
41
+ # labels with a '.'.
42
+ #-------------------------------------------------------------------------------------
43
+
44
+ should "read data into flat map" do
45
+
46
+ reader = Jcsv.reader("../data/epilepsy.csv", format: :map,
47
+ dimensions: [:treatment, :subject, :period],
48
+ default_filter: Jcsv.int)
49
+
50
+ # reader.filters = {:treatment => Jcsv.string}
51
+
52
+ # remove the :patient field from the data, as this field is already given by the
53
+ # :subject field.
54
+ reader.mapping = {:patient => false}
55
+
56
+ # read all the data into a flat map (hash) with keys the dimensions values
57
+ # concatenated with '.'.
58
+ treatment = reader.read
59
+ # p treatment
60
+
61
+ assert_equal(11, treatment["placebo.1.1"][:base])
62
+ assert_equal(31, treatment["placebo.1.1"][:age])
63
+ assert_equal(5, treatment["placebo.1.1"][:"seizure.rate"])
64
+
65
+ assert_equal(31, treatment["Progabide.35.2"][:base])
66
+ assert_equal(30, treatment["Progabide.35.2"][:age])
67
+ assert_equal(17, treatment["Progabide.35.2"][:"seizure.rate"])
68
+
69
+ end
70
+ #=begin
71
+ #-------------------------------------------------------------------------------------
72
+ # Read data into a flat map in chunks
73
+ #-------------------------------------------------------------------------------------
74
+
75
+ should "read data into flat map in chunks" do
76
+
77
+ # paramenter deep_map: is not passed. By default it is false
78
+ reader = Jcsv.reader("../data/epilepsy.csv", format: :map, chunk_size: 20,
79
+ dimensions: [:treatment, :subject, :period],
80
+ default_filter: Jcsv.int)
81
+
82
+ # remove the :patient field from the data, as this field is already given by the
83
+ # :subject field.
84
+ reader.mapping = {:patient => false}
85
+ treatment = reader.read
86
+ # p treatment
87
+
88
+ treatment_type = reader.dimensions[:treatment]
89
+ subject = reader.dimensions[:subject]
90
+ period = reader.dimensions[:period]
91
+
92
+ # variable labels has all dimension labels
93
+ assert_equal(0, treatment_type.labels["placebo"])
94
+ assert_equal(1, treatment_type.labels["Progabide"])
95
+ assert_equal(1, subject.labels["2"])
96
+ assert_equal(13, subject.labels["14"])
97
+ assert_equal(58, subject.labels["59"])
98
+ assert_equal(0, period.labels["1"])
99
+ assert_equal(3, period.labels["4"])
100
+
101
+ # we now need to access the first chunk [0] to get to the desired element
102
+ assert_equal(11, treatment[0]["placebo.1.1"][:base])
103
+ assert_equal(31, treatment[0]["placebo.1.1"][:age])
104
+ assert_equal(5, treatment[0]["placebo.1.1"][:"seizure.rate"])
105
+
106
+ # chunk [0] does not have key "Progabide.35.2"
107
+ assert_equal(nil, treatment[0]["Progabide.35.2"])
108
+
109
+ assert_equal(10, treatment[6]["Progabide.32.3"][:base])
110
+ assert_equal(30, treatment[6]["Progabide.32.3"][:age])
111
+ assert_equal(1, treatment[6]["Progabide.32.3"][:"seizure.rate"])
112
+
113
+ end
114
+
115
+ #-------------------------------------------------------------------------------------
116
+ #
117
+ #-------------------------------------------------------------------------------------
118
+
119
+ should "read to map in enumerable chunks" do
120
+
121
+ # paramenter deep_map: is not passed. By default it is false
122
+ reader = Jcsv.reader("../data/epilepsy.csv", format: :map, chunk_size: 20,
123
+ dimensions: [:treatment, :subject, :period],
124
+ default_filter: Jcsv.int)
125
+
126
+ # Method each without a block returns an enumerator
127
+ enum = reader.each
128
+
129
+ # read the first chunk. Chunk is of size 20
130
+ chunk = enum.next
131
+ data = chunk[2]
132
+
133
+ # in this case, only the first 20 rows were read, so only one treatment and six
134
+ # subjects were read until this point
135
+ assert_equal(1, reader.dimensions[:treatment].size)
136
+ # assert_equal(6, reader.dimensions[:subject].size)
137
+
138
+ assert_equal(8, data["placebo.4.4"][:base])
139
+ assert_equal(36, data["placebo.4.4"][:age])
140
+ assert_equal(4, data["placebo.4.4"][:"seizure.rate"])
141
+
142
+ # read the next chunk. Chunk is of size 20
143
+ chunk = enum.next
144
+
145
+ # read the next chunk... not interested in the second chunk for some reason...
146
+ chunk = enum.next
147
+ data = chunk[2]
148
+
149
+ # As we read new chunks of data, the dimensions labels accumulate, i.e., they are
150
+ # not erased between reads of every chunk (call to the next function). Dimensions
151
+ # are variables from the reader and not the chunk.
152
+ assert_equal(1, reader.dimensions[:treatment].size)
153
+ assert_equal(16, reader.dimensions[:subject].size)
154
+
155
+ assert_equal(33, data["placebo.12.2"][:base])
156
+ assert_equal(24, data["placebo.12.2"][:age])
157
+ assert_equal(6, data["placebo.12.2"][:"seizure.rate"])
158
+
159
+ end
160
+
161
+ #-------------------------------------------------------------------------------------
162
+ #
163
+ #-------------------------------------------------------------------------------------
164
+
165
+ should "read to map and pass to block with dimensions" do
166
+
167
+ # paramenter deep_map: is not passed. By default it is false
168
+ reader = Jcsv.reader("../data/epilepsy.csv", format: :map,
169
+ dimensions: [:treatment, :subject, :period],
170
+ default_filter: Jcsv.int)
171
+
172
+ reader.read do |line_no, row_no, row|
173
+ assert_equal(1, row.keys.size)
174
+ end
175
+
176
+ end
177
+
178
+ #-------------------------------------------------------------------------------------
179
+ #
180
+ #-------------------------------------------------------------------------------------
181
+
182
+ should "read to map and pass to block with dimensions, chunk_size > 1" do
183
+
184
+ # paramenter deep_map: is not passed. By default it is false
185
+ reader = Jcsv.reader("../data/epilepsy.csv", format: :map, chunk_size: 20,
186
+ dimensions: [:treatment, :subject, :period],
187
+ default_filter: Jcsv.int)
188
+
189
+ reader.read do |line_no, row_no, row|
190
+ assert_equal(20, row.keys.size) if line_no < 230
191
+ end
192
+
193
+ end
194
+
195
+ #-------------------------------------------------------------------------------------
196
+ #
197
+ #-------------------------------------------------------------------------------------
198
+
199
+ should "raise error if mapping a column to true" do
200
+
201
+ reader = Jcsv.reader("../data/epilepsy.csv", format: :map, chunk_size: :all,
202
+ dimensions: [:subject, :period],
203
+ default_filter: Jcsv.int)
204
+
205
+ # Raises an error, since mapping to true is not defined
206
+ assert_raise ( ArgumentError ) { reader.mapping =
207
+ {:treatment => false, :patient => true} }
208
+
209
+ end
210
+ #=end
211
+ #-------------------------------------------------------------------------------------
212
+ #
213
+ #-------------------------------------------------------------------------------------
214
+ =begin
215
+ should "raise exception when dimensions are out of order (slower moving to the left)" do
216
+
217
+ reader = Jcsv.reader("epilepsy.csv", format: :map, dimensions: [:period, :subject],
218
+ default_filter: Jcsv.int)
219
+
220
+ reader.mapping = {:treatment => false, :patient => false}
221
+
222
+ assert_raise ( RuntimeError ) { treatment = reader.read[0] }
223
+ # p treatment["1"]
224
+ # p treatment["2"]
225
+
226
+ end
227
+ =end
228
+
229
+ end
230
+
231
+ end
@@ -0,0 +1,79 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ ##########################################################################################
4
+ # Copyright © 2015 Rodrigo Botafogo. All Rights Reserved. Permission to use, copy, modify,
5
+ # and distribute this software and its documentation for educational, research, and
6
+ # not-for-profit purposes, without fee and without a signed licensing agreement, is hereby
7
+ # granted, provided that the above copyright notice, this paragraph and the following two
8
+ # paragraphs appear in all copies, modifications, and distributions. Contact Rodrigo
9
+ # Botafogo - rodrigo.a.botafogo@gmail.com for commercial licensing opportunities.
10
+ #
11
+ # IN NO EVENT SHALL RODRIGO BOTAFOGO BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
12
+ # INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
13
+ # THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF RODRIGO BOTAFOGO HAS BEEN ADVISED OF THE
14
+ # POSSIBILITY OF SUCH DAMAGE.
15
+ #
16
+ # RODRIGO BOTAFOGO SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
17
+ # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
18
+ # SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS".
19
+ # RODRIGO BOTAFOGO HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS,
20
+ # OR MODIFICATIONS.
21
+ ##########################################################################################
22
+
23
+ require 'rubygems'
24
+ require 'test/unit'
25
+ require 'shoulda'
26
+
27
+ require_relative '../config'
28
+
29
+ require 'jcsv'
30
+
31
+ class CSVTest < Test::Unit::TestCase
32
+
33
+ context "CSV test" do
34
+
35
+ setup do
36
+
37
+ end
38
+
39
+ #-------------------------------------------------------------------------------------
40
+ #
41
+ #-------------------------------------------------------------------------------------
42
+
43
+ should "parse the example csv file with filters" do
44
+
45
+ # Add filters, to filter the columns according to given rules. numberOfKids is
46
+ # optional and should be converted to and int. married is optional and should be
47
+ # converted to a boolean
48
+ parser = Jcsv.new("example.csv", headers: true, comment_starts: "#")
49
+ parser.filters = {"Year" => Jcsv.int,
50
+ "Price" => Jcsv.double}
51
+
52
+ parser.read do |line_no, row_no, row, headers|
53
+ p row
54
+ end
55
+
56
+ end
57
+
58
+ #-------------------------------------------------------------------------------------
59
+ #
60
+ #-------------------------------------------------------------------------------------
61
+
62
+ should "parse example to map" do
63
+
64
+ # type is :map. Rows are hashes. Set the default filter to not_nil. That is, all
65
+ # fields are required unless explicitly set to optional.
66
+ parser = Jcsv.new("example.csv", type: :map, default_filter: Jcsv.not_nil,
67
+ headers: true, comment_starts: "#")
68
+
69
+
70
+ parser.read do |line_no, row_no, row, headers|
71
+ p row
72
+ end
73
+
74
+
75
+ end
76
+
77
+ end
78
+
79
+ end
@@ -0,0 +1,374 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ ##########################################################################################
4
+ # Copyright © 2015 Rodrigo Botafogo. All Rights Reserved. Permission to use, copy, modify,
5
+ # and distribute this software and its documentation for educational, research, and
6
+ # not-for-profit purposes, without fee and without a signed licensing agreement, is hereby
7
+ # granted, provided that the above copyright notice, this paragraph and the following two
8
+ # paragraphs appear in all copies, modifications, and distributions. Contact Rodrigo
9
+ # Botafogo - rodrigo.a.botafogo@gmail.com for commercial licensing opportunities.
10
+ #
11
+ # IN NO EVENT SHALL RODRIGO BOTAFOGO BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
12
+ # INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
13
+ # THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF RODRIGO BOTAFOGO HAS BEEN ADVISED OF THE
14
+ # POSSIBILITY OF SUCH DAMAGE.
15
+ #
16
+ # RODRIGO BOTAFOGO SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
17
+ # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
18
+ # SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS".
19
+ # RODRIGO BOTAFOGO HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS,
20
+ # OR MODIFICATIONS.
21
+ ##########################################################################################
22
+
23
+ require 'rubygems'
24
+ require 'test/unit'
25
+ require 'shoulda'
26
+ require 'matrix'
27
+ require 'mdarray'
28
+
29
+ require_relative '../config'
30
+
31
+ require 'jcsv'
32
+
33
+ class CSVTest < Test::Unit::TestCase
34
+
35
+ context "CSV test" do
36
+
37
+ setup do
38
+
39
+ end
40
+
41
+ #-------------------------------------------------------------------------------------
42
+ #
43
+ #-------------------------------------------------------------------------------------
44
+ =begin
45
+ should "work with DecimalFormatSymbols" do
46
+
47
+ dfs = DecimalFormatSymbols.new
48
+ p dfs.currency_symbol
49
+ p dfs.decimal_separator.chr
50
+ p dfs.digit.chr
51
+ p dfs.exponent_separator
52
+ p dfs.grouping_separator.chr
53
+ p dfs.infinity
54
+ # Returns the ISO 4217 currency code of the currency of these DecimalFormatSymbols.
55
+ p dfs.international_currency_symbol
56
+ p dfs.minus_sign.chr
57
+ p dfs.monetary_decimal_separator.chr
58
+ p dfs.getNaN
59
+ # Gets the character used to separate positive and negative subpatterns in a pattern.
60
+ # p pattern_separator.chr
61
+ # Gets the character used for percent sign.
62
+ p dfs.percent.chr
63
+ # Gets the character used for per mille sign.
64
+ p dfs.per_mill
65
+ # Gets the character used for zero.
66
+ p dfs.zero_digit.chr
67
+
68
+ end
69
+
70
+ #-------------------------------------------------------------------------------------
71
+ #
72
+ #-------------------------------------------------------------------------------------
73
+
74
+ should "work with Locales" do
75
+
76
+ locale = Jcsv::Locale.default
77
+ puts "Your locale country is: #{locale.display_country}"
78
+
79
+ # Switch default locale to France, so display_country will be in French.
80
+ locale = Jcsv::Locale.default = Jcsv::Locale::FRANCE
81
+ assert_equal("français", locale.display_language)
82
+ assert_equal("France", locale.display_country)
83
+
84
+ # Create a new locale, but default is still France, so output is in French.
85
+ loc2 = Jcsv::Locale.new(language: "en", country: "US")
86
+ assert_equal("en-US", loc2.to_language_tag)
87
+ assert_equal("US", loc2.country)
88
+ assert_equal("Etats-Unis", loc2.display_country)
89
+
90
+ locale = Jcsv::Locale::US
91
+ p locale
92
+ end
93
+ =end
94
+ #-------------------------------------------------------------------------------------
95
+ #
96
+ #-------------------------------------------------------------------------------------
97
+
98
+ should "check all filters" do
99
+
100
+ reader = Jcsv.reader("../data/filters.csv", format: :map, col_sep: ";",
101
+ comment_starts: "#")
102
+
103
+ bool = Jcsv.bool(true_values: ["sim", "s", "verdadeiro", "v"],
104
+ false_values: ["nao", "n", "falso", "f"])
105
+
106
+ # supports int and long filters, but in Ruby it is better to use the fixnum
107
+ # filter
108
+ reader.filters = {
109
+ :int => Jcsv.int >> Jcsv.in_range(200, 300),
110
+ :double => Jcsv.float,
111
+ :double2 => Jcsv.float(Jcsv::Locale::US),
112
+ :long => Jcsv.long,
113
+ :complex => Jcsv.complex,
114
+ :rational => Jcsv.rational,
115
+ :big_num => Jcsv.bignum,
116
+ :big_decimal => Jcsv.big_decimal(Jcsv::Locale::US),
117
+ :big_decimal2 => Jcsv.big_decimal(Jcsv::Locale::BRAZIL),
118
+ :big_decimal3 => Jcsv.big_decimal(Jcsv::Locale::BRAZIL),
119
+ :truth1 => Jcsv.bool,
120
+ :truth2 => bool,
121
+ :truth3 => bool,
122
+ :name => Jcsv.in_range("P", "Q"),
123
+ :ip1 => Jcsv.ipaddr >> Jcsv.dynamic { |val| val.to_i } }
124
+
125
+ filters = reader.read[0]
126
+ p filters
127
+
128
+ end
129
+
130
+ #-------------------------------------------------------------------------------------
131
+ #
132
+ #-------------------------------------------------------------------------------------
133
+
134
+ should "parse dates" do
135
+
136
+ reader = Jcsv.reader("../data/dates.csv", format: :map, col_sep: ";",
137
+ comment_starts: "#")
138
+
139
+ reader.filters = {
140
+ :httpdate => Jcsv.httpdate(Date::JULIAN),
141
+ :iso8601_1 => Jcsv.iso8601(Date::ENGLAND),
142
+ :iso8601_2 => Jcsv.iso8601(Date::GREGORIAN),
143
+ :iso8601_3 => Jcsv.iso8601(Date::ITALY), # Date::ITALY is the default start date
144
+ :jd_1 => Jcsv.int >> Jcsv.jd,
145
+ :jisx0301 => Jcsv.jisx0301,
146
+ :date1 => Jcsv.date,
147
+ :date2 => Jcsv.date,
148
+ :date3 => Jcsv.date,
149
+ :rfc2822 => Jcsv.rfc2822,
150
+ :rfc3339 => Jcsv.rfc3339,
151
+ :rfc822 => Jcsv.rfc822,
152
+ :ptime1 => Jcsv.strptime('%Y-%m-%dT%H:%M:%S%z'),
153
+ :xmlschema => Jcsv.xmlschema }
154
+
155
+ filters = reader.read[0]
156
+
157
+ assert_equal(DateTime.httpdate('Sat, 03 Feb 2001 04:05:06 GMT', Date::JULIAN),
158
+ filters[:httpdate])
159
+ assert_equal(DateTime.iso8601('2001-02-03T04:05:06+07:00', Date::ENGLAND),
160
+ filters[:iso8601_1])
161
+ assert_equal(DateTime.iso8601('20010203T040506+0700', Date::GREGORIAN),
162
+ filters[:iso8601_2])
163
+ assert_equal(DateTime.iso8601('2001-W05-6T04:05:06+07:00'), filters[:iso8601_3])
164
+ assert_equal(DateTime.jd(2451944), filters[:jd_1])
165
+ assert_equal(DateTime.jisx0301('H13.02.03T04:05:06+07:00'), filters[:jisx0301])
166
+ assert_equal(DateTime.parse('2001-02-03T04:05:06+07:00'), filters[:date1])
167
+ assert_equal(DateTime.parse('20010203T040506+0700'), filters[:date2])
168
+ assert_equal(DateTime.parse('3rd Feb 2001 04:05:06 PM'), filters[:date3])
169
+ assert_equal(DateTime.rfc2822('Sat, 3 Feb 2001 04:05:06 +0700'), filters[:rfc2822])
170
+ assert_equal(DateTime.rfc3339('2001-02-03T04:05:06+07:00'), filters[:rfc3339])
171
+ assert_equal(DateTime.rfc822('Sat, 3 Feb 2001 04:05:06 +0700'), filters[:rfc822])
172
+ assert_equal(DateTime.strptime('2001-02-03T04:05:06+07:00', '%Y-%m-%dT%H:%M:%S%z'),
173
+ filters[:ptime1])
174
+ assert_equal(DateTime.xmlschema('2001-02-03T04:05:06+07:00'), filters[:xmlschema])
175
+
176
+ end
177
+
178
+ #-------------------------------------------------------------------------------------
179
+ #
180
+ #-------------------------------------------------------------------------------------
181
+
182
+ should "filter data onto a collector" do
183
+
184
+ # type is :map. Rows are hashes. Set the default filter to not_nil. That is, all
185
+ # fields are required unless explicitly set to optional.
186
+ reader = Jcsv.reader("../data/customer.csv", format: :map, chunk_size: 2)
187
+
188
+ first_names = Jcsv.collector
189
+ last_names = Jcsv.collector
190
+ kids = Jcsv.collector
191
+
192
+ reader.filters = {
193
+ :first_name => first_names,
194
+ :last_name => last_names,
195
+ :number_of_kids => Jcsv.convert_nil_to(-1) >> Jcsv.fixnum >> kids
196
+ }
197
+
198
+ map = reader.read
199
+ assert_equal(["John", "Bob", "Alice", "Bill"], first_names.collection)
200
+ assert_equal(["Dunbar", "Down", "Wunderland", "Jobs"], last_names.collection)
201
+ assert_equal([-1, 0, 0, 3], kids.collection)
202
+
203
+ end
204
+
205
+ #-------------------------------------------------------------------------------------
206
+ #
207
+ #-------------------------------------------------------------------------------------
208
+
209
+ should "accept optional fields" do
210
+
211
+ reader = Jcsv.reader("../data/customer.csv", format: :map, chunk_size: 2,
212
+ default_filter: Jcsv.not_nil)
213
+ reader.filters = {
214
+ :number_of_kids => Jcsv.optional >> Jcsv.fixnum,
215
+ :married => Jcsv.optional
216
+ }
217
+ map = reader.read
218
+ # p map
219
+
220
+ end
221
+
222
+ #-------------------------------------------------------------------------------------
223
+ #
224
+ #-------------------------------------------------------------------------------------
225
+
226
+ should "work with dynamic filter" do
227
+
228
+ reader = Jcsv.reader("../data/BJsales.csv", format: :map)
229
+
230
+ rate = 3.75 # dollar to reais convertion rate
231
+
232
+ reader.filters = {
233
+ :b_jsales => Jcsv.optional >> Jcsv.float(Jcsv::Locale::US) >>
234
+ Jcsv.in_range(0, 300) >> Jcsv.dynamic { |value| value * rate }
235
+ }
236
+
237
+ map = reader.read
238
+ assert_equal(200.1 * rate, map[0][:b_jsales])
239
+ assert_equal(199.5 * rate, map[1][:b_jsales])
240
+ assert_equal(199.4 * rate, map[2][:b_jsales])
241
+ assert_equal(198.9 * rate, map[3][:b_jsales])
242
+
243
+ end
244
+
245
+ #-------------------------------------------------------------------------------------
246
+ #
247
+ #-------------------------------------------------------------------------------------
248
+
249
+ should "create new filters" do
250
+
251
+ reader = Jcsv.reader("../data/BJsales.csv", format: :map)
252
+
253
+ class RangeFilter < Jcsv::Filter
254
+
255
+ def initialize(start, final)
256
+ @start = start
257
+ @final = final
258
+ super()
259
+ end
260
+
261
+ def execute(value, context)
262
+ # check the constraint and raise an exception if ConstraintViolation
263
+ raise Jcsv::ConstraintViolation, "value not in range in #{context}" if
264
+ value < @start || value > @final
265
+ # Call next filter
266
+ exec_next(value, context)
267
+ end
268
+
269
+ end
270
+
271
+ rate = 3.75 # dollar to reais convertion rate
272
+
273
+ reader.filters = {
274
+ :b_jsales => Jcsv.optional >> Jcsv.float(Jcsv::Locale::US) >>
275
+ RangeFilter.new(0, 200) >> Jcsv.dynamic { |value| value * rate }
276
+ }
277
+
278
+ assert_raise ( Jcsv::ConstraintViolation ) { map = reader.read }
279
+
280
+ end
281
+
282
+ #-------------------------------------------------------------------------------------
283
+ #
284
+ #-------------------------------------------------------------------------------------
285
+
286
+ should "gsub strings" do
287
+
288
+ reader = Jcsv.reader("../data/customer.csv", format: :map)
289
+
290
+ reader.filters = {
291
+ :mailing_address => Jcsv.gsub(/[eao]/, 'e' => 3, 'o' => '*', 'a' => 'A'),
292
+ :first_name => Jcsv.gsub(/[aeiou]/, '*'),
293
+ :last_name => Jcsv.gsub(/([aeiou])/, '<\1>'),
294
+ :customer_no => Jcsv.gsub(/./) {|s| s.ord.to_s + ' '} >> Jcsv.fixnum
295
+ }
296
+
297
+ map = reader.read
298
+
299
+ assert_equal("J*hn", map[0][:first_name])
300
+ assert_equal("D<u>nb<a>r", map[0][:last_name])
301
+ assert_equal(49, map[0][:customer_no])
302
+ assert_equal("1600 Amphith3Atr3 PArkwAy\nM*untAin Vi3w, CA 94043\nUnit3d StAt3s",
303
+ map[0][:mailing_address])
304
+ assert_equal("Al*c*", map[2][:first_name])
305
+ assert_equal("D<o>wn", map[1][:last_name])
306
+
307
+ end
308
+
309
+ #-------------------------------------------------------------------------------------
310
+ #
311
+ #-------------------------------------------------------------------------------------
312
+
313
+ should "process with any string functions" do
314
+
315
+ reader = Jcsv.reader("../data/customer.csv", format: :map)
316
+
317
+ reader.filters = {
318
+ :mailing_address => Jcsv.str(:[], 0, 10),
319
+ :first_name => Jcsv.str(:delete, "aeiou"),
320
+ :last_name => Jcsv.str(:partition, "n"),
321
+ :favourite_quote => Jcsv.str(:reverse) >> Jcsv.str(:prepend, "rev: ") >>
322
+ Jcsv.str(:[], 0, 20),
323
+ :email => Jcsv.str(:gsub, /[eao]/, 'e' => 3, 'o' => '*', 'a' => 'A')
324
+ }
325
+
326
+ map = reader.read
327
+ assert_equal("Jhn", map[0][:first_name])
328
+ assert_equal(["Du", "n", "bar"], map[0][:last_name])
329
+ assert_equal("1600 Amphi", map[0][:mailing_address])
330
+ assert_equal("rev: sraW ratS - \".u", map[0][:favourite_quote])
331
+ assert_equal("jdunbAr@gmAil.c*m", map[0][:email])
332
+
333
+ end
334
+
335
+ #-------------------------------------------------------------------------------------
336
+ #
337
+ #-------------------------------------------------------------------------------------
338
+
339
+ should "raise errors on contraints" do
340
+
341
+ reader = Jcsv.reader("../data/city.csv", format: :map, col_sep: ";")
342
+
343
+ reader.filters = {
344
+ # :city => Jcsv.equals
345
+ :city => Jcsv.not_ascii?
346
+ # :city => Jcsv.end_with?("ka")
347
+ }
348
+
349
+ phones = reader.read
350
+ # p phones
351
+
352
+ end
353
+
354
+ #-------------------------------------------------------------------------------------
355
+ #
356
+ #-------------------------------------------------------------------------------------
357
+
358
+ should "raise error if substring found" do
359
+
360
+ reader = Jcsv.reader("../data/customer.csv", format: :map)
361
+
362
+ reader.filters = {
363
+ :first_name => Jcsv.forbid_substrings(["jh", "paw", "Jp"]),
364
+ :last_name => Jcsv.is_element_of(["Dunbar", "Down", "Wunderland", "Jobs"])
365
+ }
366
+
367
+ customers = reader.read
368
+ # p customers
369
+
370
+ end
371
+
372
+ end
373
+
374
+ end