mdarray-jcsv 0.6.3-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +23 -0
- data/README.md +2 -0
- data/Rakefile +46 -0
- data/config.rb +104 -0
- data/lib/constraints.rb +205 -0
- data/lib/date_filters.rb +252 -0
- data/lib/dimensions.rb +276 -0
- data/lib/filters.rb +332 -0
- data/lib/jcsv.rb +107 -0
- data/lib/list_reader.rb +200 -0
- data/lib/locale.rb +192 -0
- data/lib/map_reader.rb +192 -0
- data/lib/mdarray-jcsv.rb +24 -0
- data/lib/mdarray_reader.rb +110 -0
- data/lib/numeric_filters.rb +225 -0
- data/lib/reader.rb +547 -0
- data/lib/supercsv_interface.rb +231 -0
- data/test/test_complete.rb +37 -0
- data/test/test_critbit.rb +442 -0
- data/test/test_customer_list.rb +436 -0
- data/test/test_customer_map.rb +209 -0
- data/test/test_customer_nhlist.rb +161 -0
- data/test/test_deep_map.rb +264 -0
- data/test/test_del.rb +73 -0
- data/test/test_dimensions.rb +231 -0
- data/test/test_example.rb +79 -0
- data/test/test_filters.rb +374 -0
- data/test/test_list_dimensions.rb +110 -0
- data/test/test_mdarray.rb +227 -0
- data/test/test_missing_data.rb +57 -0
- data/vendor/commons-beanutils-1.8.3.jar +0 -0
- data/vendor/commons-lang3-3.1.jar +0 -0
- data/vendor/dozer-5.4.0.jar +0 -0
- data/vendor/jcl-over-slf4j-1.6.6.jar +0 -0
- data/vendor/joda-time-2.7.jar +0 -0
- data/vendor/slf4j-api-1.7.5.jar +0 -0
- data/vendor/snakeyaml-1.14.jar +0 -0
- data/vendor/super-csv-2.4.0.jar +0 -0
- data/vendor/super-csv-dozer-2.4.0.jar +0 -0
- data/vendor/super-csv-java8-2.4.0.jar +0 -0
- data/vendor/super-csv-joda-2.4.0.jar +0 -0
- data/version.rb +2 -0
- metadata +196 -0
@@ -0,0 +1,231 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
3
|
+
##########################################################################################
|
4
|
+
# Copyright © 2015 Rodrigo Botafogo. All Rights Reserved. Permission to use, copy, modify,
|
5
|
+
# and distribute this software and its documentation for educational, research, and
|
6
|
+
# not-for-profit purposes, without fee and without a signed licensing agreement, is hereby
|
7
|
+
# granted, provided that the above copyright notice, this paragraph and the following two
|
8
|
+
# paragraphs appear in all copies, modifications, and distributions. Contact Rodrigo
|
9
|
+
# Botafogo - rodrigo.a.botafogo@gmail.com for commercial licensing opportunities.
|
10
|
+
#
|
11
|
+
# IN NO EVENT SHALL RODRIGO BOTAFOGO BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
|
12
|
+
# INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
|
13
|
+
# THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF RODRIGO BOTAFOGO HAS BEEN ADVISED OF THE
|
14
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
15
|
+
#
|
16
|
+
# RODRIGO BOTAFOGO SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
17
|
+
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
|
18
|
+
# SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS".
|
19
|
+
# RODRIGO BOTAFOGO HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS,
|
20
|
+
# OR MODIFICATIONS.
|
21
|
+
##########################################################################################
|
22
|
+
|
23
|
+
require 'test/unit'
|
24
|
+
require 'shoulda'
|
25
|
+
|
26
|
+
require_relative '../config'
|
27
|
+
|
28
|
+
require 'jcsv'
|
29
|
+
|
30
|
+
class CSVTest < Test::Unit::TestCase
|
31
|
+
|
32
|
+
context "CSV test" do
|
33
|
+
|
34
|
+
setup do
|
35
|
+
|
36
|
+
end
|
37
|
+
|
38
|
+
#-------------------------------------------------------------------------------------
|
39
|
+
# Read data into a flat map. Allows random access to the data by use of the map
|
40
|
+
# 'key'. The 'key' is a string that concatenates the values of the dimensions's
|
41
|
+
# labels with a '.'.
|
42
|
+
#-------------------------------------------------------------------------------------
|
43
|
+
|
44
|
+
should "read data into flat map" do
|
45
|
+
|
46
|
+
reader = Jcsv.reader("../data/epilepsy.csv", format: :map,
|
47
|
+
dimensions: [:treatment, :subject, :period],
|
48
|
+
default_filter: Jcsv.int)
|
49
|
+
|
50
|
+
# reader.filters = {:treatment => Jcsv.string}
|
51
|
+
|
52
|
+
# remove the :patient field from the data, as this field is already given by the
|
53
|
+
# :subject field.
|
54
|
+
reader.mapping = {:patient => false}
|
55
|
+
|
56
|
+
# read all the data into a flat map (hash) with keys the dimensions values
|
57
|
+
# concatenated with '.'.
|
58
|
+
treatment = reader.read
|
59
|
+
# p treatment
|
60
|
+
|
61
|
+
assert_equal(11, treatment["placebo.1.1"][:base])
|
62
|
+
assert_equal(31, treatment["placebo.1.1"][:age])
|
63
|
+
assert_equal(5, treatment["placebo.1.1"][:"seizure.rate"])
|
64
|
+
|
65
|
+
assert_equal(31, treatment["Progabide.35.2"][:base])
|
66
|
+
assert_equal(30, treatment["Progabide.35.2"][:age])
|
67
|
+
assert_equal(17, treatment["Progabide.35.2"][:"seizure.rate"])
|
68
|
+
|
69
|
+
end
|
70
|
+
#=begin
|
71
|
+
#-------------------------------------------------------------------------------------
|
72
|
+
# Read data into a flat map in chunks
|
73
|
+
#-------------------------------------------------------------------------------------
|
74
|
+
|
75
|
+
should "read data into flat map in chunks" do
|
76
|
+
|
77
|
+
# paramenter deep_map: is not passed. By default it is false
|
78
|
+
reader = Jcsv.reader("../data/epilepsy.csv", format: :map, chunk_size: 20,
|
79
|
+
dimensions: [:treatment, :subject, :period],
|
80
|
+
default_filter: Jcsv.int)
|
81
|
+
|
82
|
+
# remove the :patient field from the data, as this field is already given by the
|
83
|
+
# :subject field.
|
84
|
+
reader.mapping = {:patient => false}
|
85
|
+
treatment = reader.read
|
86
|
+
# p treatment
|
87
|
+
|
88
|
+
treatment_type = reader.dimensions[:treatment]
|
89
|
+
subject = reader.dimensions[:subject]
|
90
|
+
period = reader.dimensions[:period]
|
91
|
+
|
92
|
+
# variable labels has all dimension labels
|
93
|
+
assert_equal(0, treatment_type.labels["placebo"])
|
94
|
+
assert_equal(1, treatment_type.labels["Progabide"])
|
95
|
+
assert_equal(1, subject.labels["2"])
|
96
|
+
assert_equal(13, subject.labels["14"])
|
97
|
+
assert_equal(58, subject.labels["59"])
|
98
|
+
assert_equal(0, period.labels["1"])
|
99
|
+
assert_equal(3, period.labels["4"])
|
100
|
+
|
101
|
+
# we now need to access the first chunk [0] to get to the desired element
|
102
|
+
assert_equal(11, treatment[0]["placebo.1.1"][:base])
|
103
|
+
assert_equal(31, treatment[0]["placebo.1.1"][:age])
|
104
|
+
assert_equal(5, treatment[0]["placebo.1.1"][:"seizure.rate"])
|
105
|
+
|
106
|
+
# chunk [0] does not have key "Progabide.35.2"
|
107
|
+
assert_equal(nil, treatment[0]["Progabide.35.2"])
|
108
|
+
|
109
|
+
assert_equal(10, treatment[6]["Progabide.32.3"][:base])
|
110
|
+
assert_equal(30, treatment[6]["Progabide.32.3"][:age])
|
111
|
+
assert_equal(1, treatment[6]["Progabide.32.3"][:"seizure.rate"])
|
112
|
+
|
113
|
+
end
|
114
|
+
|
115
|
+
#-------------------------------------------------------------------------------------
|
116
|
+
#
|
117
|
+
#-------------------------------------------------------------------------------------
|
118
|
+
|
119
|
+
should "read to map in enumerable chunks" do
|
120
|
+
|
121
|
+
# paramenter deep_map: is not passed. By default it is false
|
122
|
+
reader = Jcsv.reader("../data/epilepsy.csv", format: :map, chunk_size: 20,
|
123
|
+
dimensions: [:treatment, :subject, :period],
|
124
|
+
default_filter: Jcsv.int)
|
125
|
+
|
126
|
+
# Method each without a block returns an enumerator
|
127
|
+
enum = reader.each
|
128
|
+
|
129
|
+
# read the first chunk. Chunk is of size 20
|
130
|
+
chunk = enum.next
|
131
|
+
data = chunk[2]
|
132
|
+
|
133
|
+
# in this case, only the first 20 rows were read, so only one treatment and six
|
134
|
+
# subjects were read until this point
|
135
|
+
assert_equal(1, reader.dimensions[:treatment].size)
|
136
|
+
# assert_equal(6, reader.dimensions[:subject].size)
|
137
|
+
|
138
|
+
assert_equal(8, data["placebo.4.4"][:base])
|
139
|
+
assert_equal(36, data["placebo.4.4"][:age])
|
140
|
+
assert_equal(4, data["placebo.4.4"][:"seizure.rate"])
|
141
|
+
|
142
|
+
# read the next chunk. Chunk is of size 20
|
143
|
+
chunk = enum.next
|
144
|
+
|
145
|
+
# read the next chunk... not interested in the second chunk for some reason...
|
146
|
+
chunk = enum.next
|
147
|
+
data = chunk[2]
|
148
|
+
|
149
|
+
# As we read new chunks of data, the dimensions labels accumulate, i.e., they are
|
150
|
+
# not erased between reads of every chunk (call to the next function). Dimensions
|
151
|
+
# are variables from the reader and not the chunk.
|
152
|
+
assert_equal(1, reader.dimensions[:treatment].size)
|
153
|
+
assert_equal(16, reader.dimensions[:subject].size)
|
154
|
+
|
155
|
+
assert_equal(33, data["placebo.12.2"][:base])
|
156
|
+
assert_equal(24, data["placebo.12.2"][:age])
|
157
|
+
assert_equal(6, data["placebo.12.2"][:"seizure.rate"])
|
158
|
+
|
159
|
+
end
|
160
|
+
|
161
|
+
#-------------------------------------------------------------------------------------
|
162
|
+
#
|
163
|
+
#-------------------------------------------------------------------------------------
|
164
|
+
|
165
|
+
should "read to map and pass to block with dimensions" do
|
166
|
+
|
167
|
+
# paramenter deep_map: is not passed. By default it is false
|
168
|
+
reader = Jcsv.reader("../data/epilepsy.csv", format: :map,
|
169
|
+
dimensions: [:treatment, :subject, :period],
|
170
|
+
default_filter: Jcsv.int)
|
171
|
+
|
172
|
+
reader.read do |line_no, row_no, row|
|
173
|
+
assert_equal(1, row.keys.size)
|
174
|
+
end
|
175
|
+
|
176
|
+
end
|
177
|
+
|
178
|
+
#-------------------------------------------------------------------------------------
|
179
|
+
#
|
180
|
+
#-------------------------------------------------------------------------------------
|
181
|
+
|
182
|
+
should "read to map and pass to block with dimensions, chunk_size > 1" do
|
183
|
+
|
184
|
+
# paramenter deep_map: is not passed. By default it is false
|
185
|
+
reader = Jcsv.reader("../data/epilepsy.csv", format: :map, chunk_size: 20,
|
186
|
+
dimensions: [:treatment, :subject, :period],
|
187
|
+
default_filter: Jcsv.int)
|
188
|
+
|
189
|
+
reader.read do |line_no, row_no, row|
|
190
|
+
assert_equal(20, row.keys.size) if line_no < 230
|
191
|
+
end
|
192
|
+
|
193
|
+
end
|
194
|
+
|
195
|
+
#-------------------------------------------------------------------------------------
|
196
|
+
#
|
197
|
+
#-------------------------------------------------------------------------------------
|
198
|
+
|
199
|
+
should "raise error if mapping a column to true" do
|
200
|
+
|
201
|
+
reader = Jcsv.reader("../data/epilepsy.csv", format: :map, chunk_size: :all,
|
202
|
+
dimensions: [:subject, :period],
|
203
|
+
default_filter: Jcsv.int)
|
204
|
+
|
205
|
+
# Raises an error, since mapping to true is not defined
|
206
|
+
assert_raise ( ArgumentError ) { reader.mapping =
|
207
|
+
{:treatment => false, :patient => true} }
|
208
|
+
|
209
|
+
end
|
210
|
+
#=end
|
211
|
+
#-------------------------------------------------------------------------------------
|
212
|
+
#
|
213
|
+
#-------------------------------------------------------------------------------------
|
214
|
+
=begin
|
215
|
+
should "raise exception when dimensions are out of order (slower moving to the left)" do
|
216
|
+
|
217
|
+
reader = Jcsv.reader("epilepsy.csv", format: :map, dimensions: [:period, :subject],
|
218
|
+
default_filter: Jcsv.int)
|
219
|
+
|
220
|
+
reader.mapping = {:treatment => false, :patient => false}
|
221
|
+
|
222
|
+
assert_raise ( RuntimeError ) { treatment = reader.read[0] }
|
223
|
+
# p treatment["1"]
|
224
|
+
# p treatment["2"]
|
225
|
+
|
226
|
+
end
|
227
|
+
=end
|
228
|
+
|
229
|
+
end
|
230
|
+
|
231
|
+
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
3
|
+
##########################################################################################
|
4
|
+
# Copyright © 2015 Rodrigo Botafogo. All Rights Reserved. Permission to use, copy, modify,
|
5
|
+
# and distribute this software and its documentation for educational, research, and
|
6
|
+
# not-for-profit purposes, without fee and without a signed licensing agreement, is hereby
|
7
|
+
# granted, provided that the above copyright notice, this paragraph and the following two
|
8
|
+
# paragraphs appear in all copies, modifications, and distributions. Contact Rodrigo
|
9
|
+
# Botafogo - rodrigo.a.botafogo@gmail.com for commercial licensing opportunities.
|
10
|
+
#
|
11
|
+
# IN NO EVENT SHALL RODRIGO BOTAFOGO BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
|
12
|
+
# INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
|
13
|
+
# THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF RODRIGO BOTAFOGO HAS BEEN ADVISED OF THE
|
14
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
15
|
+
#
|
16
|
+
# RODRIGO BOTAFOGO SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
17
|
+
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
|
18
|
+
# SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS".
|
19
|
+
# RODRIGO BOTAFOGO HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS,
|
20
|
+
# OR MODIFICATIONS.
|
21
|
+
##########################################################################################
|
22
|
+
|
23
|
+
require 'rubygems'
|
24
|
+
require 'test/unit'
|
25
|
+
require 'shoulda'
|
26
|
+
|
27
|
+
require_relative '../config'
|
28
|
+
|
29
|
+
require 'jcsv'
|
30
|
+
|
31
|
+
class CSVTest < Test::Unit::TestCase
|
32
|
+
|
33
|
+
context "CSV test" do
|
34
|
+
|
35
|
+
setup do
|
36
|
+
|
37
|
+
end
|
38
|
+
|
39
|
+
#-------------------------------------------------------------------------------------
|
40
|
+
#
|
41
|
+
#-------------------------------------------------------------------------------------
|
42
|
+
|
43
|
+
should "parse the example csv file with filters" do
|
44
|
+
|
45
|
+
# Add filters, to filter the columns according to given rules. numberOfKids is
|
46
|
+
# optional and should be converted to and int. married is optional and should be
|
47
|
+
# converted to a boolean
|
48
|
+
parser = Jcsv.new("example.csv", headers: true, comment_starts: "#")
|
49
|
+
parser.filters = {"Year" => Jcsv.int,
|
50
|
+
"Price" => Jcsv.double}
|
51
|
+
|
52
|
+
parser.read do |line_no, row_no, row, headers|
|
53
|
+
p row
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
57
|
+
|
58
|
+
#-------------------------------------------------------------------------------------
|
59
|
+
#
|
60
|
+
#-------------------------------------------------------------------------------------
|
61
|
+
|
62
|
+
should "parse example to map" do
|
63
|
+
|
64
|
+
# type is :map. Rows are hashes. Set the default filter to not_nil. That is, all
|
65
|
+
# fields are required unless explicitly set to optional.
|
66
|
+
parser = Jcsv.new("example.csv", type: :map, default_filter: Jcsv.not_nil,
|
67
|
+
headers: true, comment_starts: "#")
|
68
|
+
|
69
|
+
|
70
|
+
parser.read do |line_no, row_no, row, headers|
|
71
|
+
p row
|
72
|
+
end
|
73
|
+
|
74
|
+
|
75
|
+
end
|
76
|
+
|
77
|
+
end
|
78
|
+
|
79
|
+
end
|
@@ -0,0 +1,374 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
3
|
+
##########################################################################################
|
4
|
+
# Copyright © 2015 Rodrigo Botafogo. All Rights Reserved. Permission to use, copy, modify,
|
5
|
+
# and distribute this software and its documentation for educational, research, and
|
6
|
+
# not-for-profit purposes, without fee and without a signed licensing agreement, is hereby
|
7
|
+
# granted, provided that the above copyright notice, this paragraph and the following two
|
8
|
+
# paragraphs appear in all copies, modifications, and distributions. Contact Rodrigo
|
9
|
+
# Botafogo - rodrigo.a.botafogo@gmail.com for commercial licensing opportunities.
|
10
|
+
#
|
11
|
+
# IN NO EVENT SHALL RODRIGO BOTAFOGO BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
|
12
|
+
# INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
|
13
|
+
# THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF RODRIGO BOTAFOGO HAS BEEN ADVISED OF THE
|
14
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
15
|
+
#
|
16
|
+
# RODRIGO BOTAFOGO SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
17
|
+
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
|
18
|
+
# SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS".
|
19
|
+
# RODRIGO BOTAFOGO HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS,
|
20
|
+
# OR MODIFICATIONS.
|
21
|
+
##########################################################################################
|
22
|
+
|
23
|
+
require 'rubygems'
|
24
|
+
require 'test/unit'
|
25
|
+
require 'shoulda'
|
26
|
+
require 'matrix'
|
27
|
+
require 'mdarray'
|
28
|
+
|
29
|
+
require_relative '../config'
|
30
|
+
|
31
|
+
require 'jcsv'
|
32
|
+
|
33
|
+
class CSVTest < Test::Unit::TestCase
|
34
|
+
|
35
|
+
context "CSV test" do
|
36
|
+
|
37
|
+
setup do
|
38
|
+
|
39
|
+
end
|
40
|
+
|
41
|
+
#-------------------------------------------------------------------------------------
|
42
|
+
#
|
43
|
+
#-------------------------------------------------------------------------------------
|
44
|
+
=begin
|
45
|
+
should "work with DecimalFormatSymbols" do
|
46
|
+
|
47
|
+
dfs = DecimalFormatSymbols.new
|
48
|
+
p dfs.currency_symbol
|
49
|
+
p dfs.decimal_separator.chr
|
50
|
+
p dfs.digit.chr
|
51
|
+
p dfs.exponent_separator
|
52
|
+
p dfs.grouping_separator.chr
|
53
|
+
p dfs.infinity
|
54
|
+
# Returns the ISO 4217 currency code of the currency of these DecimalFormatSymbols.
|
55
|
+
p dfs.international_currency_symbol
|
56
|
+
p dfs.minus_sign.chr
|
57
|
+
p dfs.monetary_decimal_separator.chr
|
58
|
+
p dfs.getNaN
|
59
|
+
# Gets the character used to separate positive and negative subpatterns in a pattern.
|
60
|
+
# p pattern_separator.chr
|
61
|
+
# Gets the character used for percent sign.
|
62
|
+
p dfs.percent.chr
|
63
|
+
# Gets the character used for per mille sign.
|
64
|
+
p dfs.per_mill
|
65
|
+
# Gets the character used for zero.
|
66
|
+
p dfs.zero_digit.chr
|
67
|
+
|
68
|
+
end
|
69
|
+
|
70
|
+
#-------------------------------------------------------------------------------------
|
71
|
+
#
|
72
|
+
#-------------------------------------------------------------------------------------
|
73
|
+
|
74
|
+
should "work with Locales" do
|
75
|
+
|
76
|
+
locale = Jcsv::Locale.default
|
77
|
+
puts "Your locale country is: #{locale.display_country}"
|
78
|
+
|
79
|
+
# Switch default locale to France, so display_country will be in French.
|
80
|
+
locale = Jcsv::Locale.default = Jcsv::Locale::FRANCE
|
81
|
+
assert_equal("français", locale.display_language)
|
82
|
+
assert_equal("France", locale.display_country)
|
83
|
+
|
84
|
+
# Create a new locale, but default is still France, so output is in French.
|
85
|
+
loc2 = Jcsv::Locale.new(language: "en", country: "US")
|
86
|
+
assert_equal("en-US", loc2.to_language_tag)
|
87
|
+
assert_equal("US", loc2.country)
|
88
|
+
assert_equal("Etats-Unis", loc2.display_country)
|
89
|
+
|
90
|
+
locale = Jcsv::Locale::US
|
91
|
+
p locale
|
92
|
+
end
|
93
|
+
=end
|
94
|
+
#-------------------------------------------------------------------------------------
|
95
|
+
#
|
96
|
+
#-------------------------------------------------------------------------------------
|
97
|
+
|
98
|
+
should "check all filters" do
|
99
|
+
|
100
|
+
reader = Jcsv.reader("../data/filters.csv", format: :map, col_sep: ";",
|
101
|
+
comment_starts: "#")
|
102
|
+
|
103
|
+
bool = Jcsv.bool(true_values: ["sim", "s", "verdadeiro", "v"],
|
104
|
+
false_values: ["nao", "n", "falso", "f"])
|
105
|
+
|
106
|
+
# supports int and long filters, but in Ruby it is better to use the fixnum
|
107
|
+
# filter
|
108
|
+
reader.filters = {
|
109
|
+
:int => Jcsv.int >> Jcsv.in_range(200, 300),
|
110
|
+
:double => Jcsv.float,
|
111
|
+
:double2 => Jcsv.float(Jcsv::Locale::US),
|
112
|
+
:long => Jcsv.long,
|
113
|
+
:complex => Jcsv.complex,
|
114
|
+
:rational => Jcsv.rational,
|
115
|
+
:big_num => Jcsv.bignum,
|
116
|
+
:big_decimal => Jcsv.big_decimal(Jcsv::Locale::US),
|
117
|
+
:big_decimal2 => Jcsv.big_decimal(Jcsv::Locale::BRAZIL),
|
118
|
+
:big_decimal3 => Jcsv.big_decimal(Jcsv::Locale::BRAZIL),
|
119
|
+
:truth1 => Jcsv.bool,
|
120
|
+
:truth2 => bool,
|
121
|
+
:truth3 => bool,
|
122
|
+
:name => Jcsv.in_range("P", "Q"),
|
123
|
+
:ip1 => Jcsv.ipaddr >> Jcsv.dynamic { |val| val.to_i } }
|
124
|
+
|
125
|
+
filters = reader.read[0]
|
126
|
+
p filters
|
127
|
+
|
128
|
+
end
|
129
|
+
|
130
|
+
#-------------------------------------------------------------------------------------
|
131
|
+
#
|
132
|
+
#-------------------------------------------------------------------------------------
|
133
|
+
|
134
|
+
should "parse dates" do
|
135
|
+
|
136
|
+
reader = Jcsv.reader("../data/dates.csv", format: :map, col_sep: ";",
|
137
|
+
comment_starts: "#")
|
138
|
+
|
139
|
+
reader.filters = {
|
140
|
+
:httpdate => Jcsv.httpdate(Date::JULIAN),
|
141
|
+
:iso8601_1 => Jcsv.iso8601(Date::ENGLAND),
|
142
|
+
:iso8601_2 => Jcsv.iso8601(Date::GREGORIAN),
|
143
|
+
:iso8601_3 => Jcsv.iso8601(Date::ITALY), # Date::ITALY is the default start date
|
144
|
+
:jd_1 => Jcsv.int >> Jcsv.jd,
|
145
|
+
:jisx0301 => Jcsv.jisx0301,
|
146
|
+
:date1 => Jcsv.date,
|
147
|
+
:date2 => Jcsv.date,
|
148
|
+
:date3 => Jcsv.date,
|
149
|
+
:rfc2822 => Jcsv.rfc2822,
|
150
|
+
:rfc3339 => Jcsv.rfc3339,
|
151
|
+
:rfc822 => Jcsv.rfc822,
|
152
|
+
:ptime1 => Jcsv.strptime('%Y-%m-%dT%H:%M:%S%z'),
|
153
|
+
:xmlschema => Jcsv.xmlschema }
|
154
|
+
|
155
|
+
filters = reader.read[0]
|
156
|
+
|
157
|
+
assert_equal(DateTime.httpdate('Sat, 03 Feb 2001 04:05:06 GMT', Date::JULIAN),
|
158
|
+
filters[:httpdate])
|
159
|
+
assert_equal(DateTime.iso8601('2001-02-03T04:05:06+07:00', Date::ENGLAND),
|
160
|
+
filters[:iso8601_1])
|
161
|
+
assert_equal(DateTime.iso8601('20010203T040506+0700', Date::GREGORIAN),
|
162
|
+
filters[:iso8601_2])
|
163
|
+
assert_equal(DateTime.iso8601('2001-W05-6T04:05:06+07:00'), filters[:iso8601_3])
|
164
|
+
assert_equal(DateTime.jd(2451944), filters[:jd_1])
|
165
|
+
assert_equal(DateTime.jisx0301('H13.02.03T04:05:06+07:00'), filters[:jisx0301])
|
166
|
+
assert_equal(DateTime.parse('2001-02-03T04:05:06+07:00'), filters[:date1])
|
167
|
+
assert_equal(DateTime.parse('20010203T040506+0700'), filters[:date2])
|
168
|
+
assert_equal(DateTime.parse('3rd Feb 2001 04:05:06 PM'), filters[:date3])
|
169
|
+
assert_equal(DateTime.rfc2822('Sat, 3 Feb 2001 04:05:06 +0700'), filters[:rfc2822])
|
170
|
+
assert_equal(DateTime.rfc3339('2001-02-03T04:05:06+07:00'), filters[:rfc3339])
|
171
|
+
assert_equal(DateTime.rfc822('Sat, 3 Feb 2001 04:05:06 +0700'), filters[:rfc822])
|
172
|
+
assert_equal(DateTime.strptime('2001-02-03T04:05:06+07:00', '%Y-%m-%dT%H:%M:%S%z'),
|
173
|
+
filters[:ptime1])
|
174
|
+
assert_equal(DateTime.xmlschema('2001-02-03T04:05:06+07:00'), filters[:xmlschema])
|
175
|
+
|
176
|
+
end
|
177
|
+
|
178
|
+
#-------------------------------------------------------------------------------------
|
179
|
+
#
|
180
|
+
#-------------------------------------------------------------------------------------
|
181
|
+
|
182
|
+
should "filter data onto a collector" do
|
183
|
+
|
184
|
+
# type is :map. Rows are hashes. Set the default filter to not_nil. That is, all
|
185
|
+
# fields are required unless explicitly set to optional.
|
186
|
+
reader = Jcsv.reader("../data/customer.csv", format: :map, chunk_size: 2)
|
187
|
+
|
188
|
+
first_names = Jcsv.collector
|
189
|
+
last_names = Jcsv.collector
|
190
|
+
kids = Jcsv.collector
|
191
|
+
|
192
|
+
reader.filters = {
|
193
|
+
:first_name => first_names,
|
194
|
+
:last_name => last_names,
|
195
|
+
:number_of_kids => Jcsv.convert_nil_to(-1) >> Jcsv.fixnum >> kids
|
196
|
+
}
|
197
|
+
|
198
|
+
map = reader.read
|
199
|
+
assert_equal(["John", "Bob", "Alice", "Bill"], first_names.collection)
|
200
|
+
assert_equal(["Dunbar", "Down", "Wunderland", "Jobs"], last_names.collection)
|
201
|
+
assert_equal([-1, 0, 0, 3], kids.collection)
|
202
|
+
|
203
|
+
end
|
204
|
+
|
205
|
+
#-------------------------------------------------------------------------------------
|
206
|
+
#
|
207
|
+
#-------------------------------------------------------------------------------------
|
208
|
+
|
209
|
+
should "accept optional fields" do
|
210
|
+
|
211
|
+
reader = Jcsv.reader("../data/customer.csv", format: :map, chunk_size: 2,
|
212
|
+
default_filter: Jcsv.not_nil)
|
213
|
+
reader.filters = {
|
214
|
+
:number_of_kids => Jcsv.optional >> Jcsv.fixnum,
|
215
|
+
:married => Jcsv.optional
|
216
|
+
}
|
217
|
+
map = reader.read
|
218
|
+
# p map
|
219
|
+
|
220
|
+
end
|
221
|
+
|
222
|
+
#-------------------------------------------------------------------------------------
|
223
|
+
#
|
224
|
+
#-------------------------------------------------------------------------------------
|
225
|
+
|
226
|
+
should "work with dynamic filter" do
|
227
|
+
|
228
|
+
reader = Jcsv.reader("../data/BJsales.csv", format: :map)
|
229
|
+
|
230
|
+
rate = 3.75 # dollar to reais convertion rate
|
231
|
+
|
232
|
+
reader.filters = {
|
233
|
+
:b_jsales => Jcsv.optional >> Jcsv.float(Jcsv::Locale::US) >>
|
234
|
+
Jcsv.in_range(0, 300) >> Jcsv.dynamic { |value| value * rate }
|
235
|
+
}
|
236
|
+
|
237
|
+
map = reader.read
|
238
|
+
assert_equal(200.1 * rate, map[0][:b_jsales])
|
239
|
+
assert_equal(199.5 * rate, map[1][:b_jsales])
|
240
|
+
assert_equal(199.4 * rate, map[2][:b_jsales])
|
241
|
+
assert_equal(198.9 * rate, map[3][:b_jsales])
|
242
|
+
|
243
|
+
end
|
244
|
+
|
245
|
+
#-------------------------------------------------------------------------------------
|
246
|
+
#
|
247
|
+
#-------------------------------------------------------------------------------------
|
248
|
+
|
249
|
+
should "create new filters" do
|
250
|
+
|
251
|
+
reader = Jcsv.reader("../data/BJsales.csv", format: :map)
|
252
|
+
|
253
|
+
class RangeFilter < Jcsv::Filter
|
254
|
+
|
255
|
+
def initialize(start, final)
|
256
|
+
@start = start
|
257
|
+
@final = final
|
258
|
+
super()
|
259
|
+
end
|
260
|
+
|
261
|
+
def execute(value, context)
|
262
|
+
# check the constraint and raise an exception if ConstraintViolation
|
263
|
+
raise Jcsv::ConstraintViolation, "value not in range in #{context}" if
|
264
|
+
value < @start || value > @final
|
265
|
+
# Call next filter
|
266
|
+
exec_next(value, context)
|
267
|
+
end
|
268
|
+
|
269
|
+
end
|
270
|
+
|
271
|
+
rate = 3.75 # dollar to reais convertion rate
|
272
|
+
|
273
|
+
reader.filters = {
|
274
|
+
:b_jsales => Jcsv.optional >> Jcsv.float(Jcsv::Locale::US) >>
|
275
|
+
RangeFilter.new(0, 200) >> Jcsv.dynamic { |value| value * rate }
|
276
|
+
}
|
277
|
+
|
278
|
+
assert_raise ( Jcsv::ConstraintViolation ) { map = reader.read }
|
279
|
+
|
280
|
+
end
|
281
|
+
|
282
|
+
#-------------------------------------------------------------------------------------
|
283
|
+
#
|
284
|
+
#-------------------------------------------------------------------------------------
|
285
|
+
|
286
|
+
should "gsub strings" do
|
287
|
+
|
288
|
+
reader = Jcsv.reader("../data/customer.csv", format: :map)
|
289
|
+
|
290
|
+
reader.filters = {
|
291
|
+
:mailing_address => Jcsv.gsub(/[eao]/, 'e' => 3, 'o' => '*', 'a' => 'A'),
|
292
|
+
:first_name => Jcsv.gsub(/[aeiou]/, '*'),
|
293
|
+
:last_name => Jcsv.gsub(/([aeiou])/, '<\1>'),
|
294
|
+
:customer_no => Jcsv.gsub(/./) {|s| s.ord.to_s + ' '} >> Jcsv.fixnum
|
295
|
+
}
|
296
|
+
|
297
|
+
map = reader.read
|
298
|
+
|
299
|
+
assert_equal("J*hn", map[0][:first_name])
|
300
|
+
assert_equal("D<u>nb<a>r", map[0][:last_name])
|
301
|
+
assert_equal(49, map[0][:customer_no])
|
302
|
+
assert_equal("1600 Amphith3Atr3 PArkwAy\nM*untAin Vi3w, CA 94043\nUnit3d StAt3s",
|
303
|
+
map[0][:mailing_address])
|
304
|
+
assert_equal("Al*c*", map[2][:first_name])
|
305
|
+
assert_equal("D<o>wn", map[1][:last_name])
|
306
|
+
|
307
|
+
end
|
308
|
+
|
309
|
+
#-------------------------------------------------------------------------------------
|
310
|
+
#
|
311
|
+
#-------------------------------------------------------------------------------------
|
312
|
+
|
313
|
+
should "process with any string functions" do
|
314
|
+
|
315
|
+
reader = Jcsv.reader("../data/customer.csv", format: :map)
|
316
|
+
|
317
|
+
reader.filters = {
|
318
|
+
:mailing_address => Jcsv.str(:[], 0, 10),
|
319
|
+
:first_name => Jcsv.str(:delete, "aeiou"),
|
320
|
+
:last_name => Jcsv.str(:partition, "n"),
|
321
|
+
:favourite_quote => Jcsv.str(:reverse) >> Jcsv.str(:prepend, "rev: ") >>
|
322
|
+
Jcsv.str(:[], 0, 20),
|
323
|
+
:email => Jcsv.str(:gsub, /[eao]/, 'e' => 3, 'o' => '*', 'a' => 'A')
|
324
|
+
}
|
325
|
+
|
326
|
+
map = reader.read
|
327
|
+
assert_equal("Jhn", map[0][:first_name])
|
328
|
+
assert_equal(["Du", "n", "bar"], map[0][:last_name])
|
329
|
+
assert_equal("1600 Amphi", map[0][:mailing_address])
|
330
|
+
assert_equal("rev: sraW ratS - \".u", map[0][:favourite_quote])
|
331
|
+
assert_equal("jdunbAr@gmAil.c*m", map[0][:email])
|
332
|
+
|
333
|
+
end
|
334
|
+
|
335
|
+
#-------------------------------------------------------------------------------------
|
336
|
+
#
|
337
|
+
#-------------------------------------------------------------------------------------
|
338
|
+
|
339
|
+
should "raise errors on contraints" do
|
340
|
+
|
341
|
+
reader = Jcsv.reader("../data/city.csv", format: :map, col_sep: ";")
|
342
|
+
|
343
|
+
reader.filters = {
|
344
|
+
# :city => Jcsv.equals
|
345
|
+
:city => Jcsv.not_ascii?
|
346
|
+
# :city => Jcsv.end_with?("ka")
|
347
|
+
}
|
348
|
+
|
349
|
+
phones = reader.read
|
350
|
+
# p phones
|
351
|
+
|
352
|
+
end
|
353
|
+
|
354
|
+
#-------------------------------------------------------------------------------------
|
355
|
+
#
|
356
|
+
#-------------------------------------------------------------------------------------
|
357
|
+
|
358
|
+
should "raise error if substring found" do
|
359
|
+
|
360
|
+
reader = Jcsv.reader("../data/customer.csv", format: :map)
|
361
|
+
|
362
|
+
reader.filters = {
|
363
|
+
:first_name => Jcsv.forbid_substrings(["jh", "paw", "Jp"]),
|
364
|
+
:last_name => Jcsv.is_element_of(["Dunbar", "Down", "Wunderland", "Jobs"])
|
365
|
+
}
|
366
|
+
|
367
|
+
customers = reader.read
|
368
|
+
# p customers
|
369
|
+
|
370
|
+
end
|
371
|
+
|
372
|
+
end
|
373
|
+
|
374
|
+
end
|