mdarray-jcsv 0.6.3-java
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE.txt +23 -0
- data/README.md +2 -0
- data/Rakefile +46 -0
- data/config.rb +104 -0
- data/lib/constraints.rb +205 -0
- data/lib/date_filters.rb +252 -0
- data/lib/dimensions.rb +276 -0
- data/lib/filters.rb +332 -0
- data/lib/jcsv.rb +107 -0
- data/lib/list_reader.rb +200 -0
- data/lib/locale.rb +192 -0
- data/lib/map_reader.rb +192 -0
- data/lib/mdarray-jcsv.rb +24 -0
- data/lib/mdarray_reader.rb +110 -0
- data/lib/numeric_filters.rb +225 -0
- data/lib/reader.rb +547 -0
- data/lib/supercsv_interface.rb +231 -0
- data/test/test_complete.rb +37 -0
- data/test/test_critbit.rb +442 -0
- data/test/test_customer_list.rb +436 -0
- data/test/test_customer_map.rb +209 -0
- data/test/test_customer_nhlist.rb +161 -0
- data/test/test_deep_map.rb +264 -0
- data/test/test_del.rb +73 -0
- data/test/test_dimensions.rb +231 -0
- data/test/test_example.rb +79 -0
- data/test/test_filters.rb +374 -0
- data/test/test_list_dimensions.rb +110 -0
- data/test/test_mdarray.rb +227 -0
- data/test/test_missing_data.rb +57 -0
- data/vendor/commons-beanutils-1.8.3.jar +0 -0
- data/vendor/commons-lang3-3.1.jar +0 -0
- data/vendor/dozer-5.4.0.jar +0 -0
- data/vendor/jcl-over-slf4j-1.6.6.jar +0 -0
- data/vendor/joda-time-2.7.jar +0 -0
- data/vendor/slf4j-api-1.7.5.jar +0 -0
- data/vendor/snakeyaml-1.14.jar +0 -0
- data/vendor/super-csv-2.4.0.jar +0 -0
- data/vendor/super-csv-dozer-2.4.0.jar +0 -0
- data/vendor/super-csv-java8-2.4.0.jar +0 -0
- data/vendor/super-csv-joda-2.4.0.jar +0 -0
- data/version.rb +2 -0
- metadata +196 -0
@@ -0,0 +1,231 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
3
|
+
##########################################################################################
|
4
|
+
# Copyright © 2015 Rodrigo Botafogo. All Rights Reserved. Permission to use, copy, modify,
|
5
|
+
# and distribute this software and its documentation for educational, research, and
|
6
|
+
# not-for-profit purposes, without fee and without a signed licensing agreement, is hereby
|
7
|
+
# granted, provided that the above copyright notice, this paragraph and the following two
|
8
|
+
# paragraphs appear in all copies, modifications, and distributions. Contact Rodrigo
|
9
|
+
# Botafogo - rodrigo.a.botafogo@gmail.com for commercial licensing opportunities.
|
10
|
+
#
|
11
|
+
# IN NO EVENT SHALL RODRIGO BOTAFOGO BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
|
12
|
+
# INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
|
13
|
+
# THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF RODRIGO BOTAFOGO HAS BEEN ADVISED OF THE
|
14
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
15
|
+
#
|
16
|
+
# RODRIGO BOTAFOGO SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
17
|
+
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
|
18
|
+
# SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS".
|
19
|
+
# RODRIGO BOTAFOGO HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS,
|
20
|
+
# OR MODIFICATIONS.
|
21
|
+
##########################################################################################
|
22
|
+
|
23
|
+
require 'test/unit'
|
24
|
+
require 'shoulda'
|
25
|
+
|
26
|
+
require_relative '../config'
|
27
|
+
|
28
|
+
require 'jcsv'
|
29
|
+
|
30
|
+
class CSVTest < Test::Unit::TestCase
|
31
|
+
|
32
|
+
context "CSV test" do
|
33
|
+
|
34
|
+
setup do
|
35
|
+
|
36
|
+
end
|
37
|
+
|
38
|
+
#-------------------------------------------------------------------------------------
|
39
|
+
# Read data into a flat map. Allows random access to the data by use of the map
|
40
|
+
# 'key'. The 'key' is a string that concatenates the values of the dimensions's
|
41
|
+
# labels with a '.'.
|
42
|
+
#-------------------------------------------------------------------------------------
|
43
|
+
|
44
|
+
should "read data into flat map" do
|
45
|
+
|
46
|
+
reader = Jcsv.reader("../data/epilepsy.csv", format: :map,
|
47
|
+
dimensions: [:treatment, :subject, :period],
|
48
|
+
default_filter: Jcsv.int)
|
49
|
+
|
50
|
+
# reader.filters = {:treatment => Jcsv.string}
|
51
|
+
|
52
|
+
# remove the :patient field from the data, as this field is already given by the
|
53
|
+
# :subject field.
|
54
|
+
reader.mapping = {:patient => false}
|
55
|
+
|
56
|
+
# read all the data into a flat map (hash) with keys the dimensions values
|
57
|
+
# concatenated with '.'.
|
58
|
+
treatment = reader.read
|
59
|
+
# p treatment
|
60
|
+
|
61
|
+
assert_equal(11, treatment["placebo.1.1"][:base])
|
62
|
+
assert_equal(31, treatment["placebo.1.1"][:age])
|
63
|
+
assert_equal(5, treatment["placebo.1.1"][:"seizure.rate"])
|
64
|
+
|
65
|
+
assert_equal(31, treatment["Progabide.35.2"][:base])
|
66
|
+
assert_equal(30, treatment["Progabide.35.2"][:age])
|
67
|
+
assert_equal(17, treatment["Progabide.35.2"][:"seizure.rate"])
|
68
|
+
|
69
|
+
end
|
70
|
+
#=begin
|
71
|
+
#-------------------------------------------------------------------------------------
|
72
|
+
# Read data into a flat map in chunks
|
73
|
+
#-------------------------------------------------------------------------------------
|
74
|
+
|
75
|
+
should "read data into flat map in chunks" do
|
76
|
+
|
77
|
+
# paramenter deep_map: is not passed. By default it is false
|
78
|
+
reader = Jcsv.reader("../data/epilepsy.csv", format: :map, chunk_size: 20,
|
79
|
+
dimensions: [:treatment, :subject, :period],
|
80
|
+
default_filter: Jcsv.int)
|
81
|
+
|
82
|
+
# remove the :patient field from the data, as this field is already given by the
|
83
|
+
# :subject field.
|
84
|
+
reader.mapping = {:patient => false}
|
85
|
+
treatment = reader.read
|
86
|
+
# p treatment
|
87
|
+
|
88
|
+
treatment_type = reader.dimensions[:treatment]
|
89
|
+
subject = reader.dimensions[:subject]
|
90
|
+
period = reader.dimensions[:period]
|
91
|
+
|
92
|
+
# variable labels has all dimension labels
|
93
|
+
assert_equal(0, treatment_type.labels["placebo"])
|
94
|
+
assert_equal(1, treatment_type.labels["Progabide"])
|
95
|
+
assert_equal(1, subject.labels["2"])
|
96
|
+
assert_equal(13, subject.labels["14"])
|
97
|
+
assert_equal(58, subject.labels["59"])
|
98
|
+
assert_equal(0, period.labels["1"])
|
99
|
+
assert_equal(3, period.labels["4"])
|
100
|
+
|
101
|
+
# we now need to access the first chunk [0] to get to the desired element
|
102
|
+
assert_equal(11, treatment[0]["placebo.1.1"][:base])
|
103
|
+
assert_equal(31, treatment[0]["placebo.1.1"][:age])
|
104
|
+
assert_equal(5, treatment[0]["placebo.1.1"][:"seizure.rate"])
|
105
|
+
|
106
|
+
# chunk [0] does not have key "Progabide.35.2"
|
107
|
+
assert_equal(nil, treatment[0]["Progabide.35.2"])
|
108
|
+
|
109
|
+
assert_equal(10, treatment[6]["Progabide.32.3"][:base])
|
110
|
+
assert_equal(30, treatment[6]["Progabide.32.3"][:age])
|
111
|
+
assert_equal(1, treatment[6]["Progabide.32.3"][:"seizure.rate"])
|
112
|
+
|
113
|
+
end
|
114
|
+
|
115
|
+
#-------------------------------------------------------------------------------------
|
116
|
+
#
|
117
|
+
#-------------------------------------------------------------------------------------
|
118
|
+
|
119
|
+
should "read to map in enumerable chunks" do
|
120
|
+
|
121
|
+
# paramenter deep_map: is not passed. By default it is false
|
122
|
+
reader = Jcsv.reader("../data/epilepsy.csv", format: :map, chunk_size: 20,
|
123
|
+
dimensions: [:treatment, :subject, :period],
|
124
|
+
default_filter: Jcsv.int)
|
125
|
+
|
126
|
+
# Method each without a block returns an enumerator
|
127
|
+
enum = reader.each
|
128
|
+
|
129
|
+
# read the first chunk. Chunk is of size 20
|
130
|
+
chunk = enum.next
|
131
|
+
data = chunk[2]
|
132
|
+
|
133
|
+
# in this case, only the first 20 rows were read, so only one treatment and six
|
134
|
+
# subjects were read until this point
|
135
|
+
assert_equal(1, reader.dimensions[:treatment].size)
|
136
|
+
# assert_equal(6, reader.dimensions[:subject].size)
|
137
|
+
|
138
|
+
assert_equal(8, data["placebo.4.4"][:base])
|
139
|
+
assert_equal(36, data["placebo.4.4"][:age])
|
140
|
+
assert_equal(4, data["placebo.4.4"][:"seizure.rate"])
|
141
|
+
|
142
|
+
# read the next chunk. Chunk is of size 20
|
143
|
+
chunk = enum.next
|
144
|
+
|
145
|
+
# read the next chunk... not interested in the second chunk for some reason...
|
146
|
+
chunk = enum.next
|
147
|
+
data = chunk[2]
|
148
|
+
|
149
|
+
# As we read new chunks of data, the dimensions labels accumulate, i.e., they are
|
150
|
+
# not erased between reads of every chunk (call to the next function). Dimensions
|
151
|
+
# are variables from the reader and not the chunk.
|
152
|
+
assert_equal(1, reader.dimensions[:treatment].size)
|
153
|
+
assert_equal(16, reader.dimensions[:subject].size)
|
154
|
+
|
155
|
+
assert_equal(33, data["placebo.12.2"][:base])
|
156
|
+
assert_equal(24, data["placebo.12.2"][:age])
|
157
|
+
assert_equal(6, data["placebo.12.2"][:"seizure.rate"])
|
158
|
+
|
159
|
+
end
|
160
|
+
|
161
|
+
#-------------------------------------------------------------------------------------
|
162
|
+
#
|
163
|
+
#-------------------------------------------------------------------------------------
|
164
|
+
|
165
|
+
should "read to map and pass to block with dimensions" do
|
166
|
+
|
167
|
+
# paramenter deep_map: is not passed. By default it is false
|
168
|
+
reader = Jcsv.reader("../data/epilepsy.csv", format: :map,
|
169
|
+
dimensions: [:treatment, :subject, :period],
|
170
|
+
default_filter: Jcsv.int)
|
171
|
+
|
172
|
+
reader.read do |line_no, row_no, row|
|
173
|
+
assert_equal(1, row.keys.size)
|
174
|
+
end
|
175
|
+
|
176
|
+
end
|
177
|
+
|
178
|
+
#-------------------------------------------------------------------------------------
|
179
|
+
#
|
180
|
+
#-------------------------------------------------------------------------------------
|
181
|
+
|
182
|
+
should "read to map and pass to block with dimensions, chunk_size > 1" do
|
183
|
+
|
184
|
+
# paramenter deep_map: is not passed. By default it is false
|
185
|
+
reader = Jcsv.reader("../data/epilepsy.csv", format: :map, chunk_size: 20,
|
186
|
+
dimensions: [:treatment, :subject, :period],
|
187
|
+
default_filter: Jcsv.int)
|
188
|
+
|
189
|
+
reader.read do |line_no, row_no, row|
|
190
|
+
assert_equal(20, row.keys.size) if line_no < 230
|
191
|
+
end
|
192
|
+
|
193
|
+
end
|
194
|
+
|
195
|
+
#-------------------------------------------------------------------------------------
|
196
|
+
#
|
197
|
+
#-------------------------------------------------------------------------------------
|
198
|
+
|
199
|
+
should "raise error if mapping a column to true" do
|
200
|
+
|
201
|
+
reader = Jcsv.reader("../data/epilepsy.csv", format: :map, chunk_size: :all,
|
202
|
+
dimensions: [:subject, :period],
|
203
|
+
default_filter: Jcsv.int)
|
204
|
+
|
205
|
+
# Raises an error, since mapping to true is not defined
|
206
|
+
assert_raise ( ArgumentError ) { reader.mapping =
|
207
|
+
{:treatment => false, :patient => true} }
|
208
|
+
|
209
|
+
end
|
210
|
+
#=end
|
211
|
+
#-------------------------------------------------------------------------------------
|
212
|
+
#
|
213
|
+
#-------------------------------------------------------------------------------------
|
214
|
+
=begin
|
215
|
+
should "raise exception when dimensions are out of order (slower moving to the left)" do
|
216
|
+
|
217
|
+
reader = Jcsv.reader("epilepsy.csv", format: :map, dimensions: [:period, :subject],
|
218
|
+
default_filter: Jcsv.int)
|
219
|
+
|
220
|
+
reader.mapping = {:treatment => false, :patient => false}
|
221
|
+
|
222
|
+
assert_raise ( RuntimeError ) { treatment = reader.read[0] }
|
223
|
+
# p treatment["1"]
|
224
|
+
# p treatment["2"]
|
225
|
+
|
226
|
+
end
|
227
|
+
=end
|
228
|
+
|
229
|
+
end
|
230
|
+
|
231
|
+
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
3
|
+
##########################################################################################
|
4
|
+
# Copyright © 2015 Rodrigo Botafogo. All Rights Reserved. Permission to use, copy, modify,
|
5
|
+
# and distribute this software and its documentation for educational, research, and
|
6
|
+
# not-for-profit purposes, without fee and without a signed licensing agreement, is hereby
|
7
|
+
# granted, provided that the above copyright notice, this paragraph and the following two
|
8
|
+
# paragraphs appear in all copies, modifications, and distributions. Contact Rodrigo
|
9
|
+
# Botafogo - rodrigo.a.botafogo@gmail.com for commercial licensing opportunities.
|
10
|
+
#
|
11
|
+
# IN NO EVENT SHALL RODRIGO BOTAFOGO BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
|
12
|
+
# INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
|
13
|
+
# THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF RODRIGO BOTAFOGO HAS BEEN ADVISED OF THE
|
14
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
15
|
+
#
|
16
|
+
# RODRIGO BOTAFOGO SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
17
|
+
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
|
18
|
+
# SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS".
|
19
|
+
# RODRIGO BOTAFOGO HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS,
|
20
|
+
# OR MODIFICATIONS.
|
21
|
+
##########################################################################################
|
22
|
+
|
23
|
+
require 'rubygems'
|
24
|
+
require 'test/unit'
|
25
|
+
require 'shoulda'
|
26
|
+
|
27
|
+
require_relative '../config'
|
28
|
+
|
29
|
+
require 'jcsv'
|
30
|
+
|
31
|
+
class CSVTest < Test::Unit::TestCase
|
32
|
+
|
33
|
+
context "CSV test" do
|
34
|
+
|
35
|
+
setup do
|
36
|
+
|
37
|
+
end
|
38
|
+
|
39
|
+
#-------------------------------------------------------------------------------------
|
40
|
+
#
|
41
|
+
#-------------------------------------------------------------------------------------
|
42
|
+
|
43
|
+
should "parse the example csv file with filters" do
|
44
|
+
|
45
|
+
# Add filters, to filter the columns according to given rules. numberOfKids is
|
46
|
+
# optional and should be converted to and int. married is optional and should be
|
47
|
+
# converted to a boolean
|
48
|
+
parser = Jcsv.new("example.csv", headers: true, comment_starts: "#")
|
49
|
+
parser.filters = {"Year" => Jcsv.int,
|
50
|
+
"Price" => Jcsv.double}
|
51
|
+
|
52
|
+
parser.read do |line_no, row_no, row, headers|
|
53
|
+
p row
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
57
|
+
|
58
|
+
#-------------------------------------------------------------------------------------
|
59
|
+
#
|
60
|
+
#-------------------------------------------------------------------------------------
|
61
|
+
|
62
|
+
should "parse example to map" do
|
63
|
+
|
64
|
+
# type is :map. Rows are hashes. Set the default filter to not_nil. That is, all
|
65
|
+
# fields are required unless explicitly set to optional.
|
66
|
+
parser = Jcsv.new("example.csv", type: :map, default_filter: Jcsv.not_nil,
|
67
|
+
headers: true, comment_starts: "#")
|
68
|
+
|
69
|
+
|
70
|
+
parser.read do |line_no, row_no, row, headers|
|
71
|
+
p row
|
72
|
+
end
|
73
|
+
|
74
|
+
|
75
|
+
end
|
76
|
+
|
77
|
+
end
|
78
|
+
|
79
|
+
end
|
@@ -0,0 +1,374 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
3
|
+
##########################################################################################
|
4
|
+
# Copyright © 2015 Rodrigo Botafogo. All Rights Reserved. Permission to use, copy, modify,
|
5
|
+
# and distribute this software and its documentation for educational, research, and
|
6
|
+
# not-for-profit purposes, without fee and without a signed licensing agreement, is hereby
|
7
|
+
# granted, provided that the above copyright notice, this paragraph and the following two
|
8
|
+
# paragraphs appear in all copies, modifications, and distributions. Contact Rodrigo
|
9
|
+
# Botafogo - rodrigo.a.botafogo@gmail.com for commercial licensing opportunities.
|
10
|
+
#
|
11
|
+
# IN NO EVENT SHALL RODRIGO BOTAFOGO BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
|
12
|
+
# INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
|
13
|
+
# THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF RODRIGO BOTAFOGO HAS BEEN ADVISED OF THE
|
14
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
15
|
+
#
|
16
|
+
# RODRIGO BOTAFOGO SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
17
|
+
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
|
18
|
+
# SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS".
|
19
|
+
# RODRIGO BOTAFOGO HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS,
|
20
|
+
# OR MODIFICATIONS.
|
21
|
+
##########################################################################################
|
22
|
+
|
23
|
+
require 'rubygems'
|
24
|
+
require 'test/unit'
|
25
|
+
require 'shoulda'
|
26
|
+
require 'matrix'
|
27
|
+
require 'mdarray'
|
28
|
+
|
29
|
+
require_relative '../config'
|
30
|
+
|
31
|
+
require 'jcsv'
|
32
|
+
|
33
|
+
class CSVTest < Test::Unit::TestCase
|
34
|
+
|
35
|
+
context "CSV test" do
|
36
|
+
|
37
|
+
setup do
|
38
|
+
|
39
|
+
end
|
40
|
+
|
41
|
+
#-------------------------------------------------------------------------------------
|
42
|
+
#
|
43
|
+
#-------------------------------------------------------------------------------------
|
44
|
+
=begin
|
45
|
+
should "work with DecimalFormatSymbols" do
|
46
|
+
|
47
|
+
dfs = DecimalFormatSymbols.new
|
48
|
+
p dfs.currency_symbol
|
49
|
+
p dfs.decimal_separator.chr
|
50
|
+
p dfs.digit.chr
|
51
|
+
p dfs.exponent_separator
|
52
|
+
p dfs.grouping_separator.chr
|
53
|
+
p dfs.infinity
|
54
|
+
# Returns the ISO 4217 currency code of the currency of these DecimalFormatSymbols.
|
55
|
+
p dfs.international_currency_symbol
|
56
|
+
p dfs.minus_sign.chr
|
57
|
+
p dfs.monetary_decimal_separator.chr
|
58
|
+
p dfs.getNaN
|
59
|
+
# Gets the character used to separate positive and negative subpatterns in a pattern.
|
60
|
+
# p pattern_separator.chr
|
61
|
+
# Gets the character used for percent sign.
|
62
|
+
p dfs.percent.chr
|
63
|
+
# Gets the character used for per mille sign.
|
64
|
+
p dfs.per_mill
|
65
|
+
# Gets the character used for zero.
|
66
|
+
p dfs.zero_digit.chr
|
67
|
+
|
68
|
+
end
|
69
|
+
|
70
|
+
#-------------------------------------------------------------------------------------
|
71
|
+
#
|
72
|
+
#-------------------------------------------------------------------------------------
|
73
|
+
|
74
|
+
should "work with Locales" do
|
75
|
+
|
76
|
+
locale = Jcsv::Locale.default
|
77
|
+
puts "Your locale country is: #{locale.display_country}"
|
78
|
+
|
79
|
+
# Switch default locale to France, so display_country will be in French.
|
80
|
+
locale = Jcsv::Locale.default = Jcsv::Locale::FRANCE
|
81
|
+
assert_equal("français", locale.display_language)
|
82
|
+
assert_equal("France", locale.display_country)
|
83
|
+
|
84
|
+
# Create a new locale, but default is still France, so output is in French.
|
85
|
+
loc2 = Jcsv::Locale.new(language: "en", country: "US")
|
86
|
+
assert_equal("en-US", loc2.to_language_tag)
|
87
|
+
assert_equal("US", loc2.country)
|
88
|
+
assert_equal("Etats-Unis", loc2.display_country)
|
89
|
+
|
90
|
+
locale = Jcsv::Locale::US
|
91
|
+
p locale
|
92
|
+
end
|
93
|
+
=end
|
94
|
+
#-------------------------------------------------------------------------------------
|
95
|
+
#
|
96
|
+
#-------------------------------------------------------------------------------------
|
97
|
+
|
98
|
+
should "check all filters" do
|
99
|
+
|
100
|
+
reader = Jcsv.reader("../data/filters.csv", format: :map, col_sep: ";",
|
101
|
+
comment_starts: "#")
|
102
|
+
|
103
|
+
bool = Jcsv.bool(true_values: ["sim", "s", "verdadeiro", "v"],
|
104
|
+
false_values: ["nao", "n", "falso", "f"])
|
105
|
+
|
106
|
+
# supports int and long filters, but in Ruby it is better to use the fixnum
|
107
|
+
# filter
|
108
|
+
reader.filters = {
|
109
|
+
:int => Jcsv.int >> Jcsv.in_range(200, 300),
|
110
|
+
:double => Jcsv.float,
|
111
|
+
:double2 => Jcsv.float(Jcsv::Locale::US),
|
112
|
+
:long => Jcsv.long,
|
113
|
+
:complex => Jcsv.complex,
|
114
|
+
:rational => Jcsv.rational,
|
115
|
+
:big_num => Jcsv.bignum,
|
116
|
+
:big_decimal => Jcsv.big_decimal(Jcsv::Locale::US),
|
117
|
+
:big_decimal2 => Jcsv.big_decimal(Jcsv::Locale::BRAZIL),
|
118
|
+
:big_decimal3 => Jcsv.big_decimal(Jcsv::Locale::BRAZIL),
|
119
|
+
:truth1 => Jcsv.bool,
|
120
|
+
:truth2 => bool,
|
121
|
+
:truth3 => bool,
|
122
|
+
:name => Jcsv.in_range("P", "Q"),
|
123
|
+
:ip1 => Jcsv.ipaddr >> Jcsv.dynamic { |val| val.to_i } }
|
124
|
+
|
125
|
+
filters = reader.read[0]
|
126
|
+
p filters
|
127
|
+
|
128
|
+
end
|
129
|
+
|
130
|
+
#-------------------------------------------------------------------------------------
|
131
|
+
#
|
132
|
+
#-------------------------------------------------------------------------------------
|
133
|
+
|
134
|
+
should "parse dates" do
|
135
|
+
|
136
|
+
reader = Jcsv.reader("../data/dates.csv", format: :map, col_sep: ";",
|
137
|
+
comment_starts: "#")
|
138
|
+
|
139
|
+
reader.filters = {
|
140
|
+
:httpdate => Jcsv.httpdate(Date::JULIAN),
|
141
|
+
:iso8601_1 => Jcsv.iso8601(Date::ENGLAND),
|
142
|
+
:iso8601_2 => Jcsv.iso8601(Date::GREGORIAN),
|
143
|
+
:iso8601_3 => Jcsv.iso8601(Date::ITALY), # Date::ITALY is the default start date
|
144
|
+
:jd_1 => Jcsv.int >> Jcsv.jd,
|
145
|
+
:jisx0301 => Jcsv.jisx0301,
|
146
|
+
:date1 => Jcsv.date,
|
147
|
+
:date2 => Jcsv.date,
|
148
|
+
:date3 => Jcsv.date,
|
149
|
+
:rfc2822 => Jcsv.rfc2822,
|
150
|
+
:rfc3339 => Jcsv.rfc3339,
|
151
|
+
:rfc822 => Jcsv.rfc822,
|
152
|
+
:ptime1 => Jcsv.strptime('%Y-%m-%dT%H:%M:%S%z'),
|
153
|
+
:xmlschema => Jcsv.xmlschema }
|
154
|
+
|
155
|
+
filters = reader.read[0]
|
156
|
+
|
157
|
+
assert_equal(DateTime.httpdate('Sat, 03 Feb 2001 04:05:06 GMT', Date::JULIAN),
|
158
|
+
filters[:httpdate])
|
159
|
+
assert_equal(DateTime.iso8601('2001-02-03T04:05:06+07:00', Date::ENGLAND),
|
160
|
+
filters[:iso8601_1])
|
161
|
+
assert_equal(DateTime.iso8601('20010203T040506+0700', Date::GREGORIAN),
|
162
|
+
filters[:iso8601_2])
|
163
|
+
assert_equal(DateTime.iso8601('2001-W05-6T04:05:06+07:00'), filters[:iso8601_3])
|
164
|
+
assert_equal(DateTime.jd(2451944), filters[:jd_1])
|
165
|
+
assert_equal(DateTime.jisx0301('H13.02.03T04:05:06+07:00'), filters[:jisx0301])
|
166
|
+
assert_equal(DateTime.parse('2001-02-03T04:05:06+07:00'), filters[:date1])
|
167
|
+
assert_equal(DateTime.parse('20010203T040506+0700'), filters[:date2])
|
168
|
+
assert_equal(DateTime.parse('3rd Feb 2001 04:05:06 PM'), filters[:date3])
|
169
|
+
assert_equal(DateTime.rfc2822('Sat, 3 Feb 2001 04:05:06 +0700'), filters[:rfc2822])
|
170
|
+
assert_equal(DateTime.rfc3339('2001-02-03T04:05:06+07:00'), filters[:rfc3339])
|
171
|
+
assert_equal(DateTime.rfc822('Sat, 3 Feb 2001 04:05:06 +0700'), filters[:rfc822])
|
172
|
+
assert_equal(DateTime.strptime('2001-02-03T04:05:06+07:00', '%Y-%m-%dT%H:%M:%S%z'),
|
173
|
+
filters[:ptime1])
|
174
|
+
assert_equal(DateTime.xmlschema('2001-02-03T04:05:06+07:00'), filters[:xmlschema])
|
175
|
+
|
176
|
+
end
|
177
|
+
|
178
|
+
#-------------------------------------------------------------------------------------
|
179
|
+
#
|
180
|
+
#-------------------------------------------------------------------------------------
|
181
|
+
|
182
|
+
should "filter data onto a collector" do
|
183
|
+
|
184
|
+
# type is :map. Rows are hashes. Set the default filter to not_nil. That is, all
|
185
|
+
# fields are required unless explicitly set to optional.
|
186
|
+
reader = Jcsv.reader("../data/customer.csv", format: :map, chunk_size: 2)
|
187
|
+
|
188
|
+
first_names = Jcsv.collector
|
189
|
+
last_names = Jcsv.collector
|
190
|
+
kids = Jcsv.collector
|
191
|
+
|
192
|
+
reader.filters = {
|
193
|
+
:first_name => first_names,
|
194
|
+
:last_name => last_names,
|
195
|
+
:number_of_kids => Jcsv.convert_nil_to(-1) >> Jcsv.fixnum >> kids
|
196
|
+
}
|
197
|
+
|
198
|
+
map = reader.read
|
199
|
+
assert_equal(["John", "Bob", "Alice", "Bill"], first_names.collection)
|
200
|
+
assert_equal(["Dunbar", "Down", "Wunderland", "Jobs"], last_names.collection)
|
201
|
+
assert_equal([-1, 0, 0, 3], kids.collection)
|
202
|
+
|
203
|
+
end
|
204
|
+
|
205
|
+
#-------------------------------------------------------------------------------------
|
206
|
+
#
|
207
|
+
#-------------------------------------------------------------------------------------
|
208
|
+
|
209
|
+
should "accept optional fields" do
|
210
|
+
|
211
|
+
reader = Jcsv.reader("../data/customer.csv", format: :map, chunk_size: 2,
|
212
|
+
default_filter: Jcsv.not_nil)
|
213
|
+
reader.filters = {
|
214
|
+
:number_of_kids => Jcsv.optional >> Jcsv.fixnum,
|
215
|
+
:married => Jcsv.optional
|
216
|
+
}
|
217
|
+
map = reader.read
|
218
|
+
# p map
|
219
|
+
|
220
|
+
end
|
221
|
+
|
222
|
+
#-------------------------------------------------------------------------------------
|
223
|
+
#
|
224
|
+
#-------------------------------------------------------------------------------------
|
225
|
+
|
226
|
+
should "work with dynamic filter" do
|
227
|
+
|
228
|
+
reader = Jcsv.reader("../data/BJsales.csv", format: :map)
|
229
|
+
|
230
|
+
rate = 3.75 # dollar to reais convertion rate
|
231
|
+
|
232
|
+
reader.filters = {
|
233
|
+
:b_jsales => Jcsv.optional >> Jcsv.float(Jcsv::Locale::US) >>
|
234
|
+
Jcsv.in_range(0, 300) >> Jcsv.dynamic { |value| value * rate }
|
235
|
+
}
|
236
|
+
|
237
|
+
map = reader.read
|
238
|
+
assert_equal(200.1 * rate, map[0][:b_jsales])
|
239
|
+
assert_equal(199.5 * rate, map[1][:b_jsales])
|
240
|
+
assert_equal(199.4 * rate, map[2][:b_jsales])
|
241
|
+
assert_equal(198.9 * rate, map[3][:b_jsales])
|
242
|
+
|
243
|
+
end
|
244
|
+
|
245
|
+
#-------------------------------------------------------------------------------------
|
246
|
+
#
|
247
|
+
#-------------------------------------------------------------------------------------
|
248
|
+
|
249
|
+
should "create new filters" do
|
250
|
+
|
251
|
+
reader = Jcsv.reader("../data/BJsales.csv", format: :map)
|
252
|
+
|
253
|
+
class RangeFilter < Jcsv::Filter
|
254
|
+
|
255
|
+
def initialize(start, final)
|
256
|
+
@start = start
|
257
|
+
@final = final
|
258
|
+
super()
|
259
|
+
end
|
260
|
+
|
261
|
+
def execute(value, context)
|
262
|
+
# check the constraint and raise an exception if ConstraintViolation
|
263
|
+
raise Jcsv::ConstraintViolation, "value not in range in #{context}" if
|
264
|
+
value < @start || value > @final
|
265
|
+
# Call next filter
|
266
|
+
exec_next(value, context)
|
267
|
+
end
|
268
|
+
|
269
|
+
end
|
270
|
+
|
271
|
+
rate = 3.75 # dollar to reais convertion rate
|
272
|
+
|
273
|
+
reader.filters = {
|
274
|
+
:b_jsales => Jcsv.optional >> Jcsv.float(Jcsv::Locale::US) >>
|
275
|
+
RangeFilter.new(0, 200) >> Jcsv.dynamic { |value| value * rate }
|
276
|
+
}
|
277
|
+
|
278
|
+
assert_raise ( Jcsv::ConstraintViolation ) { map = reader.read }
|
279
|
+
|
280
|
+
end
|
281
|
+
|
282
|
+
#-------------------------------------------------------------------------------------
|
283
|
+
#
|
284
|
+
#-------------------------------------------------------------------------------------
|
285
|
+
|
286
|
+
should "gsub strings" do
|
287
|
+
|
288
|
+
reader = Jcsv.reader("../data/customer.csv", format: :map)
|
289
|
+
|
290
|
+
reader.filters = {
|
291
|
+
:mailing_address => Jcsv.gsub(/[eao]/, 'e' => 3, 'o' => '*', 'a' => 'A'),
|
292
|
+
:first_name => Jcsv.gsub(/[aeiou]/, '*'),
|
293
|
+
:last_name => Jcsv.gsub(/([aeiou])/, '<\1>'),
|
294
|
+
:customer_no => Jcsv.gsub(/./) {|s| s.ord.to_s + ' '} >> Jcsv.fixnum
|
295
|
+
}
|
296
|
+
|
297
|
+
map = reader.read
|
298
|
+
|
299
|
+
assert_equal("J*hn", map[0][:first_name])
|
300
|
+
assert_equal("D<u>nb<a>r", map[0][:last_name])
|
301
|
+
assert_equal(49, map[0][:customer_no])
|
302
|
+
assert_equal("1600 Amphith3Atr3 PArkwAy\nM*untAin Vi3w, CA 94043\nUnit3d StAt3s",
|
303
|
+
map[0][:mailing_address])
|
304
|
+
assert_equal("Al*c*", map[2][:first_name])
|
305
|
+
assert_equal("D<o>wn", map[1][:last_name])
|
306
|
+
|
307
|
+
end
|
308
|
+
|
309
|
+
#-------------------------------------------------------------------------------------
|
310
|
+
#
|
311
|
+
#-------------------------------------------------------------------------------------
|
312
|
+
|
313
|
+
should "process with any string functions" do
|
314
|
+
|
315
|
+
reader = Jcsv.reader("../data/customer.csv", format: :map)
|
316
|
+
|
317
|
+
reader.filters = {
|
318
|
+
:mailing_address => Jcsv.str(:[], 0, 10),
|
319
|
+
:first_name => Jcsv.str(:delete, "aeiou"),
|
320
|
+
:last_name => Jcsv.str(:partition, "n"),
|
321
|
+
:favourite_quote => Jcsv.str(:reverse) >> Jcsv.str(:prepend, "rev: ") >>
|
322
|
+
Jcsv.str(:[], 0, 20),
|
323
|
+
:email => Jcsv.str(:gsub, /[eao]/, 'e' => 3, 'o' => '*', 'a' => 'A')
|
324
|
+
}
|
325
|
+
|
326
|
+
map = reader.read
|
327
|
+
assert_equal("Jhn", map[0][:first_name])
|
328
|
+
assert_equal(["Du", "n", "bar"], map[0][:last_name])
|
329
|
+
assert_equal("1600 Amphi", map[0][:mailing_address])
|
330
|
+
assert_equal("rev: sraW ratS - \".u", map[0][:favourite_quote])
|
331
|
+
assert_equal("jdunbAr@gmAil.c*m", map[0][:email])
|
332
|
+
|
333
|
+
end
|
334
|
+
|
335
|
+
#-------------------------------------------------------------------------------------
|
336
|
+
#
|
337
|
+
#-------------------------------------------------------------------------------------
|
338
|
+
|
339
|
+
should "raise errors on contraints" do
|
340
|
+
|
341
|
+
reader = Jcsv.reader("../data/city.csv", format: :map, col_sep: ";")
|
342
|
+
|
343
|
+
reader.filters = {
|
344
|
+
# :city => Jcsv.equals
|
345
|
+
:city => Jcsv.not_ascii?
|
346
|
+
# :city => Jcsv.end_with?("ka")
|
347
|
+
}
|
348
|
+
|
349
|
+
phones = reader.read
|
350
|
+
# p phones
|
351
|
+
|
352
|
+
end
|
353
|
+
|
354
|
+
#-------------------------------------------------------------------------------------
|
355
|
+
#
|
356
|
+
#-------------------------------------------------------------------------------------
|
357
|
+
|
358
|
+
should "raise error if substring found" do
|
359
|
+
|
360
|
+
reader = Jcsv.reader("../data/customer.csv", format: :map)
|
361
|
+
|
362
|
+
reader.filters = {
|
363
|
+
:first_name => Jcsv.forbid_substrings(["jh", "paw", "Jp"]),
|
364
|
+
:last_name => Jcsv.is_element_of(["Dunbar", "Down", "Wunderland", "Jobs"])
|
365
|
+
}
|
366
|
+
|
367
|
+
customers = reader.read
|
368
|
+
# p customers
|
369
|
+
|
370
|
+
end
|
371
|
+
|
372
|
+
end
|
373
|
+
|
374
|
+
end
|