csvreader 1.2.4 → 1.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/{HISTORY.md → CHANGELOG.md} +3 -3
- data/Manifest.txt +1 -2
- data/README.md +682 -682
- data/Rakefile +33 -32
- data/datasets/cars11.csv +10 -10
- data/datasets/cities11.csv +12 -12
- data/datasets/customers11.csv +13 -13
- data/datasets/iris.attrib.csv +25 -25
- data/datasets/iris11.csv +163 -163
- data/datasets/lcc.attrib.csv +14 -14
- data/datasets/shakespeare.csv +9 -9
- data/lib/csvreader/base.rb +6 -2
- data/lib/csvreader/buffer.rb +0 -1
- data/lib/csvreader/builder.rb +0 -1
- data/lib/csvreader/converter.rb +0 -1
- data/lib/csvreader/parser.rb +32 -33
- data/lib/csvreader/parser_fixed.rb +105 -106
- data/lib/csvreader/parser_json.rb +23 -24
- data/lib/csvreader/parser_std.rb +582 -583
- data/lib/csvreader/parser_strict.rb +290 -291
- data/lib/csvreader/parser_tab.rb +22 -23
- data/lib/csvreader/parser_table.rb +122 -123
- data/lib/csvreader/parser_yaml.rb +23 -24
- data/lib/csvreader/reader.rb +2 -3
- data/lib/csvreader/reader_hash.rb +1 -2
- data/lib/csvreader/version.rb +30 -32
- data/lib/csvreader.rb +0 -1
- data/test/test_parser_formats.rb +66 -66
- data/test/test_parser_java.rb +208 -208
- metadata +18 -15
- data/LICENSE.md +0 -116
data/Rakefile
CHANGED
@@ -1,32 +1,33 @@
|
|
1
|
-
require 'hoe'
|
2
|
-
require './lib/csvreader/version.rb'
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
self.
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
self.
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
self.
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
['
|
23
|
-
['
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
1
|
+
require 'hoe'
|
2
|
+
require './lib/csvreader/version.rb'
|
3
|
+
|
4
|
+
|
5
|
+
Hoe.spec 'csvreader' do
|
6
|
+
|
7
|
+
self.version = CsvReader::VERSION
|
8
|
+
|
9
|
+
self.summary = "csvreader - read tabular data in the comma-separated values (csv) format the right way (uses best practices out-of-the-box with zero-configuration)"
|
10
|
+
self.description = summary
|
11
|
+
|
12
|
+
self.urls = { home: 'https://github.com/csvreader/csvreader' }
|
13
|
+
|
14
|
+
self.author = 'Gerald Bauer'
|
15
|
+
self.email = 'wwwmake@googlegroups.com'
|
16
|
+
|
17
|
+
# switch extension to .markdown for gihub formatting
|
18
|
+
self.readme_file = 'README.md'
|
19
|
+
self.history_file = 'CHANGELOG.md'
|
20
|
+
|
21
|
+
self.extra_deps = [
|
22
|
+
['tabreader', '>=1.0.1'],
|
23
|
+
['csvyaml', '>=0.1.0'],
|
24
|
+
['csvjson', '>=1.0.0']
|
25
|
+
]
|
26
|
+
|
27
|
+
self.licenses = ['Public Domain']
|
28
|
+
|
29
|
+
self.spec_extras = {
|
30
|
+
required_ruby_version: '>= 2.2.2'
|
31
|
+
}
|
32
|
+
|
33
|
+
end
|
data/datasets/cars11.csv
CHANGED
@@ -1,10 +1,10 @@
|
|
1
|
-
#####
|
2
|
-
# csv sample from the wikipedia article "Comma-separated values"
|
3
|
-
# see en.wikipedia.org/wiki/Comma-separated_values
|
4
|
-
|
5
|
-
Year,Make,Model,Description,Price
|
6
|
-
1997, Ford, E350,"ac, abs, moon",3000.00
|
7
|
-
1999, Chevy, "Venture ""Extended Edition""","",4900.00
|
8
|
-
1999, Chevy, "Venture ""Extended Edition, Very Large""",,5000.00
|
9
|
-
1996, Jeep, Grand Cherokee,"MUST SELL!
|
10
|
-
air, moon roof, loaded",4799.00
|
1
|
+
#####
|
2
|
+
# csv sample from the wikipedia article "Comma-separated values"
|
3
|
+
# see en.wikipedia.org/wiki/Comma-separated_values
|
4
|
+
|
5
|
+
Year,Make,Model,Description,Price
|
6
|
+
1997, Ford, E350,"ac, abs, moon",3000.00
|
7
|
+
1999, Chevy, "Venture ""Extended Edition""","",4900.00
|
8
|
+
1999, Chevy, "Venture ""Extended Edition, Very Large""",,5000.00
|
9
|
+
1996, Jeep, Grand Cherokee,"MUST SELL!
|
10
|
+
air, moon roof, loaded",4799.00
|
data/datasets/cities11.csv
CHANGED
@@ -1,12 +1,12 @@
|
|
1
|
-
#####
|
2
|
-
# csv sample from the wikipedia article "Comma-separated values"
|
3
|
-
# see en.wikipedia.org/wiki/Comma-separated_values
|
4
|
-
#
|
5
|
-
# note:
|
6
|
-
# Double quote processing need only apply if the field starts
|
7
|
-
# with a double quote. Note, however, that double quotes are not
|
8
|
-
# allowed in unquoted fields according to RFC 4180
|
9
|
-
|
10
|
-
Los Angeles, 34°03'N, 118°15'W
|
11
|
-
New York City, 40°42'46"N, 74°00'21"W
|
12
|
-
Paris, 48°51'24"N, 2°21'03"E
|
1
|
+
#####
|
2
|
+
# csv sample from the wikipedia article "Comma-separated values"
|
3
|
+
# see en.wikipedia.org/wiki/Comma-separated_values
|
4
|
+
#
|
5
|
+
# note:
|
6
|
+
# Double quote processing need only apply if the field starts
|
7
|
+
# with a double quote. Note, however, that double quotes are not
|
8
|
+
# allowed in unquoted fields according to RFC 4180
|
9
|
+
|
10
|
+
Los Angeles, 34°03'N, 118°15'W
|
11
|
+
New York City, 40°42'46"N, 74°00'21"W
|
12
|
+
Paris, 48°51'24"N, 2°21'03"E
|
data/datasets/customers11.csv
CHANGED
@@ -1,13 +1,13 @@
|
|
1
|
-
#####
|
2
|
-
# csv sample from the article:
|
3
|
-
# A Guide to the Ruby CSV Library, Part I
|
4
|
-
# - sitepoint.com/guide-ruby-csv-library-part
|
5
|
-
|
6
|
-
Name,Times arrived,Total $ spent,Food feedback
|
7
|
-
Dan, 34, 2548, Lovin it!
|
8
|
-
Maria, 55, 5054, "Good, delicious food"
|
9
|
-
Carlos, 22, 4352, "I am ""pleased"", but could be better"
|
10
|
-
Stephany, 34, 6542, I want bigger steaks!!!!!
|
11
|
-
James, 1, 43, Not bad
|
12
|
-
Robin, 1, 56, Fish is tasty
|
13
|
-
Anna, 1, 79, "Good, better, the best!"
|
1
|
+
#####
|
2
|
+
# csv sample from the article:
|
3
|
+
# A Guide to the Ruby CSV Library, Part I
|
4
|
+
# - sitepoint.com/guide-ruby-csv-library-part
|
5
|
+
|
6
|
+
Name,Times arrived,Total $ spent,Food feedback
|
7
|
+
Dan, 34, 2548, Lovin it!
|
8
|
+
Maria, 55, 5054, "Good, delicious food"
|
9
|
+
Carlos, 22, 4352, "I am ""pleased"", but could be better"
|
10
|
+
Stephany, 34, 6542, I want bigger steaks!!!!!
|
11
|
+
James, 1, 43, Not bad
|
12
|
+
Robin, 1, 56, Fish is tasty
|
13
|
+
Anna, 1, 79, "Good, better, the best!"
|
data/datasets/iris.attrib.csv
CHANGED
@@ -1,25 +1,25 @@
|
|
1
|
-
% 1. Title: Iris Plants Database
|
2
|
-
%
|
3
|
-
% 2. Sources:
|
4
|
-
% (a) Creator: R.A. Fisher
|
5
|
-
|
6
|
-
|
7
|
-
@RELATION iris
|
8
|
-
|
9
|
-
@ATTRIBUTE sepallength NUMERIC
|
10
|
-
@ATTRIBUTE sepalwidth NUMERIC
|
11
|
-
@ATTRIBUTE petallength NUMERIC
|
12
|
-
@ATTRIBUTE petalwidth NUMERIC
|
13
|
-
@ATTRIBUTE class {Iris-setosa,Iris-versicolor,Iris-virginica}
|
14
|
-
|
15
|
-
@DATA
|
16
|
-
5.1,3.5,1.4,0.2,Iris-setosa
|
17
|
-
4.9,3.0,1.4,0.2,Iris-setosa
|
18
|
-
4.7,3.2,1.3,0.2,Iris-setosa
|
19
|
-
4.6,3.1,1.5,0.2,Iris-setosa
|
20
|
-
5.0,3.6,1.4,0.2,Iris-setosa
|
21
|
-
5.4,3.9,1.7,0.4,Iris-setosa
|
22
|
-
4.6,3.4,1.4,0.3,Iris-setosa
|
23
|
-
5.0,3.4,1.5,0.2,Iris-setosa
|
24
|
-
4.4,2.9,1.4,0.2,Iris-setosa
|
25
|
-
4.9,3.1,1.5,0.1,Iris-setosa
|
1
|
+
% 1. Title: Iris Plants Database
|
2
|
+
%
|
3
|
+
% 2. Sources:
|
4
|
+
% (a) Creator: R.A. Fisher
|
5
|
+
|
6
|
+
|
7
|
+
@RELATION iris
|
8
|
+
|
9
|
+
@ATTRIBUTE sepallength NUMERIC
|
10
|
+
@ATTRIBUTE sepalwidth NUMERIC
|
11
|
+
@ATTRIBUTE petallength NUMERIC
|
12
|
+
@ATTRIBUTE petalwidth NUMERIC
|
13
|
+
@ATTRIBUTE class {Iris-setosa,Iris-versicolor,Iris-virginica}
|
14
|
+
|
15
|
+
@DATA
|
16
|
+
5.1,3.5,1.4,0.2,Iris-setosa
|
17
|
+
4.9,3.0,1.4,0.2,Iris-setosa
|
18
|
+
4.7,3.2,1.3,0.2,Iris-setosa
|
19
|
+
4.6,3.1,1.5,0.2,Iris-setosa
|
20
|
+
5.0,3.6,1.4,0.2,Iris-setosa
|
21
|
+
5.4,3.9,1.7,0.4,Iris-setosa
|
22
|
+
4.6,3.4,1.4,0.3,Iris-setosa
|
23
|
+
5.0,3.4,1.5,0.2,Iris-setosa
|
24
|
+
4.4,2.9,1.4,0.2,Iris-setosa
|
25
|
+
4.9,3.1,1.5,0.1,Iris-setosa
|
data/datasets/iris11.csv
CHANGED
@@ -1,163 +1,163 @@
|
|
1
|
-
###
|
2
|
-
# The Iris flower data set or Fisher's Iris data set is a multivariate data set
|
3
|
-
# introduced by the British statistician and biologist Ronald Fisher in his 1936 paper
|
4
|
-
# The use of multiple measurements in taxonomic problems as an example of
|
5
|
-
# linear discriminant analysis.
|
6
|
-
# See https://en.wikipedia.org/wiki/Iris_flower_data_set
|
7
|
-
|
8
|
-
|
9
|
-
# The dataset contains a set of 150 records under five attributes
|
10
|
-
# - petal length, petal width, sepal length, sepal width and species.
|
11
|
-
|
12
|
-
|
13
|
-
Sepal length, Sepal width, Petal length, Petal width, Species
|
14
|
-
5.1, 3.5, 1.4, 0.2, I. setosa
|
15
|
-
4.9, 3.0, 1.4, 0.2, I. setosa
|
16
|
-
4.7, 3.2, 1.3, 0.2, I. setosa
|
17
|
-
4.6, 3.1, 1.5, 0.2, I. setosa
|
18
|
-
5.0, 3.6, 1.4, 0.3, I. setosa
|
19
|
-
5.4, 3.9, 1.7, 0.4, I. setosa
|
20
|
-
4.6, 3.4, 1.4, 0.3, I. setosa
|
21
|
-
5.0, 3.4, 1.5, 0.2, I. setosa
|
22
|
-
4.4, 2.9, 1.4, 0.2, I. setosa
|
23
|
-
4.9, 3.1, 1.5, 0.1, I. setosa
|
24
|
-
5.4, 3.7, 1.5, 0.2, I. setosa
|
25
|
-
4.8, 3.4, 1.6, 0.2, I. setosa
|
26
|
-
4.8, 3.0, 1.4, 0.1, I. setosa
|
27
|
-
4.3, 3.0, 1.1, 0.1, I. setosa
|
28
|
-
5.8, 4.0, 1.2, 0.2, I. setosa
|
29
|
-
5.7, 4.4, 1.5, 0.4, I. setosa
|
30
|
-
5.4, 3.9, 1.3, 0.4, I. setosa
|
31
|
-
5.1, 3.5, 1.4, 0.3, I. setosa
|
32
|
-
5.7, 3.8, 1.7, 0.3, I. setosa
|
33
|
-
5.1, 3.8, 1.5, 0.3, I. setosa
|
34
|
-
5.4, 3.4, 1.7, 0.2, I. setosa
|
35
|
-
5.1, 3.7, 1.5, 0.4, I. setosa
|
36
|
-
4.6, 3.6, 1.0, 0.2, I. setosa
|
37
|
-
5.1, 3.3, 1.7, 0.5, I. setosa
|
38
|
-
4.8, 3.4, 1.9, 0.2, I. setosa
|
39
|
-
5.0, 3.0, 1.6, 0.2, I. setosa
|
40
|
-
5.0, 3.4, 1.6, 0.4, I. setosa
|
41
|
-
5.2, 3.5, 1.5, 0.2, I. setosa
|
42
|
-
5.2, 3.4, 1.4, 0.2, I. setosa
|
43
|
-
4.7, 3.2, 1.6, 0.2, I. setosa
|
44
|
-
4.8, 3.1, 1.6, 0.2, I. setosa
|
45
|
-
5.4, 3.4, 1.5, 0.4, I. setosa
|
46
|
-
5.2, 4.1, 1.5, 0.1, I. setosa
|
47
|
-
5.5, 4.2, 1.4, 0.2, I. setosa
|
48
|
-
4.9, 3.1, 1.5, 0.2, I. setosa
|
49
|
-
5.0, 3.2, 1.2, 0.2, I. setosa
|
50
|
-
5.5, 3.5, 1.3, 0.2, I. setosa
|
51
|
-
4.9, 3.6, 1.4, 0.1, I. setosa
|
52
|
-
4.4, 3.0, 1.3, 0.2, I. setosa
|
53
|
-
5.1, 3.4, 1.5, 0.2, I. setosa
|
54
|
-
5.0, 3.5, 1.3, 0.3, I. setosa
|
55
|
-
4.5, 2.3, 1.3, 0.3, I. setosa
|
56
|
-
4.4, 3.2, 1.3, 0.2, I. setosa
|
57
|
-
5.0, 3.5, 1.6, 0.6, I. setosa
|
58
|
-
5.1, 3.8, 1.9, 0.4, I. setosa
|
59
|
-
4.8, 3.0, 1.4, 0.3, I. setosa
|
60
|
-
5.1, 3.8, 1.6, 0.2, I. setosa
|
61
|
-
4.6, 3.2, 1.4, 0.2, I. setosa
|
62
|
-
5.3, 3.7, 1.5, 0.2, I. setosa
|
63
|
-
5.0, 3.3, 1.4, 0.2, I. setosa
|
64
|
-
7.0, 3.2, 4.7, 1.4, I. versicolor
|
65
|
-
6.4, 3.2, 4.5, 1.5, I. versicolor
|
66
|
-
6.9, 3.1, 4.9, 1.5, I. versicolor
|
67
|
-
5.5, 2.3, 4.0, 1.3, I. versicolor
|
68
|
-
6.5, 2.8, 4.6, 1.5, I. versicolor
|
69
|
-
5.7, 2.8, 4.5, 1.3, I. versicolor
|
70
|
-
6.3, 3.3, 4.7, 1.6, I. versicolor
|
71
|
-
4.9, 2.4, 3.3, 1.0, I. versicolor
|
72
|
-
6.6, 2.9, 4.6, 1.3, I. versicolor
|
73
|
-
5.2, 2.7, 3.9, 1.4, I. versicolor
|
74
|
-
5.0, 2.0, 3.5, 1.0, I. versicolor
|
75
|
-
5.9, 3.0, 4.2, 1.5, I. versicolor
|
76
|
-
6.0, 2.2, 4.0, 1.0, I. versicolor
|
77
|
-
6.1, 2.9, 4.7, 1.4, I. versicolor
|
78
|
-
5.6, 2.9, 3.6, 1.3, I. versicolor
|
79
|
-
6.7, 3.1, 4.4, 1.4, I. versicolor
|
80
|
-
5.6, 3.0, 4.5, 1.5, I. versicolor
|
81
|
-
5.8, 2.7, 4.1, 1.0, I. versicolor
|
82
|
-
6.2, 2.2, 4.5, 1.5, I. versicolor
|
83
|
-
5.6, 2.5, 3.9, 1.1, I. versicolor
|
84
|
-
5.9, 3.2, 4.8, 1.8, I. versicolor
|
85
|
-
6.1, 2.8, 4.0, 1.3, I. versicolor
|
86
|
-
6.3, 2.5, 4.9, 1.5, I. versicolor
|
87
|
-
6.1, 2.8, 4.7, 1.2, I. versicolor
|
88
|
-
6.4, 2.9, 4.3, 1.3, I. versicolor
|
89
|
-
6.6, 3.0, 4.4, 1.4, I. versicolor
|
90
|
-
6.8, 2.8, 4.8, 1.4, I. versicolor
|
91
|
-
6.7, 3.0, 5.0, 1.7, I. versicolor
|
92
|
-
6.0, 2.9, 4.5, 1.5, I. versicolor
|
93
|
-
5.7, 2.6, 3.5, 1.0, I. versicolor
|
94
|
-
5.5, 2.4, 3.8, 1.1, I. versicolor
|
95
|
-
5.5, 2.4, 3.7, 1.0, I. versicolor
|
96
|
-
5.8, 2.7, 3.9, 1.2, I. versicolor
|
97
|
-
6.0, 2.7, 5.1, 1.6, I. versicolor
|
98
|
-
5.4, 3.0, 4.5, 1.5, I. versicolor
|
99
|
-
6.0, 3.4, 4.5, 1.6, I. versicolor
|
100
|
-
6.7, 3.1, 4.7, 1.5, I. versicolor
|
101
|
-
6.3, 2.3, 4.4, 1.3, I. versicolor
|
102
|
-
5.6, 3.0, 4.1, 1.3, I. versicolor
|
103
|
-
5.5, 2.5, 4.0, 1.3, I. versicolor
|
104
|
-
5.5, 2.6, 4.4, 1.2, I. versicolor
|
105
|
-
6.1, 3.0, 4.6, 1.4, I. versicolor
|
106
|
-
5.8, 2.6, 4.0, 1.2, I. versicolor
|
107
|
-
5.0, 2.3, 3.3, 1.0, I. versicolor
|
108
|
-
5.6, 2.7, 4.2, 1.3, I. versicolor
|
109
|
-
5.7, 3.0, 4.2, 1.2, I. versicolor
|
110
|
-
5.7, 2.9, 4.2, 1.3, I. versicolor
|
111
|
-
6.2, 2.9, 4.3, 1.3, I. versicolor
|
112
|
-
5.1, 2.5, 3.0, 1.1, I. versicolor
|
113
|
-
5.7, 2.8, 4.1, 1.3, I. versicolor
|
114
|
-
6.3, 3.3, 6.0, 2.5, I. virginica
|
115
|
-
5.8, 2.7, 5.1, 1.9, I. virginica
|
116
|
-
7.1, 3.0, 5.9, 2.1, I. virginica
|
117
|
-
6.3, 2.9, 5.6, 1.8, I. virginica
|
118
|
-
6.5, 3.0, 5.8, 2.2, I. virginica
|
119
|
-
7.6, 3.0, 6.6, 2.1, I. virginica
|
120
|
-
4.9, 2.5, 4.5, 1.7, I. virginica
|
121
|
-
7.3, 2.9, 6.3, 1.8, I. virginica
|
122
|
-
6.7, 2.5, 5.8, 1.8, I. virginica
|
123
|
-
7.2, 3.6, 6.1, 2.5, I. virginica
|
124
|
-
6.5, 3.2, 5.1, 2.0, I. virginica
|
125
|
-
6.4, 2.7, 5.3, 1.9, I. virginica
|
126
|
-
6.8, 3.0, 5.5, 2.1, I. virginica
|
127
|
-
5.7, 2.5, 5.0, 2.0, I. virginica
|
128
|
-
5.8, 2.8, 5.1, 2.4, I. virginica
|
129
|
-
6.4, 3.2, 5.3, 2.3, I. virginica
|
130
|
-
6.5, 3.0, 5.5, 1.8, I. virginica
|
131
|
-
7.7, 3.8, 6.7, 2.2, I. virginica
|
132
|
-
7.7, 2.6, 6.9, 2.3, I. virginica
|
133
|
-
6.0, 2.2, 5.0, 1.5, I. virginica
|
134
|
-
6.9, 3.2, 5.7, 2.3, I. virginica
|
135
|
-
5.6, 2.8, 4.9, 2.0, I. virginica
|
136
|
-
7.7, 2.8, 6.7, 2.0, I. virginica
|
137
|
-
6.3, 2.7, 4.9, 1.8, I. virginica
|
138
|
-
6.7, 3.3, 5.7, 2.1, I. virginica
|
139
|
-
7.2, 3.2, 6.0, 1.8, I. virginica
|
140
|
-
6.2, 2.8, 4.8, 1.8, I. virginica
|
141
|
-
6.1, 3.0, 4.9, 1.8, I. virginica
|
142
|
-
6.4, 2.8, 5.6, 2.1, I. virginica
|
143
|
-
7.2, 3.0, 5.8, 1.6, I. virginica
|
144
|
-
7.4, 2.8, 6.1, 1.9, I. virginica
|
145
|
-
7.9, 3.8, 6.4, 2.0, I. virginica
|
146
|
-
6.4, 2.8, 5.6, 2.2, I. virginica
|
147
|
-
6.3, 2.8, 5.1, 1.5, I. virginica
|
148
|
-
6.1, 2.6, 5.6, 1.4, I. virginica
|
149
|
-
7.7, 3.0, 6.1, 2.3, I. virginica
|
150
|
-
6.3, 3.4, 5.6, 2.4, I. virginica
|
151
|
-
6.4, 3.1, 5.5, 1.8, I. virginica
|
152
|
-
6.0, 3.0, 4.8, 1.8, I. virginica
|
153
|
-
6.9, 3.1, 5.4, 2.1, I. virginica
|
154
|
-
6.7, 3.1, 5.6, 2.4, I. virginica
|
155
|
-
6.9, 3.1, 5.1, 2.3, I. virginica
|
156
|
-
5.8, 2.7, 5.1, 1.9, I. virginica
|
157
|
-
6.8, 3.2, 5.9, 2.3, I. virginica
|
158
|
-
6.7, 3.3, 5.7, 2.5, I. virginica
|
159
|
-
6.7, 3.0, 5.2, 2.3, I. virginica
|
160
|
-
6.3, 2.5, 5.0, 1.9, I. virginica
|
161
|
-
6.5, 3.0, 5.2, 2.0, I. virginica
|
162
|
-
6.2, 3.4, 5.4, 2.3, I. virginica
|
163
|
-
5.9, 3.0, 5.1, 1.8, I. virginica
|
1
|
+
###
|
2
|
+
# The Iris flower data set or Fisher's Iris data set is a multivariate data set
|
3
|
+
# introduced by the British statistician and biologist Ronald Fisher in his 1936 paper
|
4
|
+
# The use of multiple measurements in taxonomic problems as an example of
|
5
|
+
# linear discriminant analysis.
|
6
|
+
# See https://en.wikipedia.org/wiki/Iris_flower_data_set
|
7
|
+
|
8
|
+
|
9
|
+
# The dataset contains a set of 150 records under five attributes
|
10
|
+
# - petal length, petal width, sepal length, sepal width and species.
|
11
|
+
|
12
|
+
|
13
|
+
Sepal length, Sepal width, Petal length, Petal width, Species
|
14
|
+
5.1, 3.5, 1.4, 0.2, I. setosa
|
15
|
+
4.9, 3.0, 1.4, 0.2, I. setosa
|
16
|
+
4.7, 3.2, 1.3, 0.2, I. setosa
|
17
|
+
4.6, 3.1, 1.5, 0.2, I. setosa
|
18
|
+
5.0, 3.6, 1.4, 0.3, I. setosa
|
19
|
+
5.4, 3.9, 1.7, 0.4, I. setosa
|
20
|
+
4.6, 3.4, 1.4, 0.3, I. setosa
|
21
|
+
5.0, 3.4, 1.5, 0.2, I. setosa
|
22
|
+
4.4, 2.9, 1.4, 0.2, I. setosa
|
23
|
+
4.9, 3.1, 1.5, 0.1, I. setosa
|
24
|
+
5.4, 3.7, 1.5, 0.2, I. setosa
|
25
|
+
4.8, 3.4, 1.6, 0.2, I. setosa
|
26
|
+
4.8, 3.0, 1.4, 0.1, I. setosa
|
27
|
+
4.3, 3.0, 1.1, 0.1, I. setosa
|
28
|
+
5.8, 4.0, 1.2, 0.2, I. setosa
|
29
|
+
5.7, 4.4, 1.5, 0.4, I. setosa
|
30
|
+
5.4, 3.9, 1.3, 0.4, I. setosa
|
31
|
+
5.1, 3.5, 1.4, 0.3, I. setosa
|
32
|
+
5.7, 3.8, 1.7, 0.3, I. setosa
|
33
|
+
5.1, 3.8, 1.5, 0.3, I. setosa
|
34
|
+
5.4, 3.4, 1.7, 0.2, I. setosa
|
35
|
+
5.1, 3.7, 1.5, 0.4, I. setosa
|
36
|
+
4.6, 3.6, 1.0, 0.2, I. setosa
|
37
|
+
5.1, 3.3, 1.7, 0.5, I. setosa
|
38
|
+
4.8, 3.4, 1.9, 0.2, I. setosa
|
39
|
+
5.0, 3.0, 1.6, 0.2, I. setosa
|
40
|
+
5.0, 3.4, 1.6, 0.4, I. setosa
|
41
|
+
5.2, 3.5, 1.5, 0.2, I. setosa
|
42
|
+
5.2, 3.4, 1.4, 0.2, I. setosa
|
43
|
+
4.7, 3.2, 1.6, 0.2, I. setosa
|
44
|
+
4.8, 3.1, 1.6, 0.2, I. setosa
|
45
|
+
5.4, 3.4, 1.5, 0.4, I. setosa
|
46
|
+
5.2, 4.1, 1.5, 0.1, I. setosa
|
47
|
+
5.5, 4.2, 1.4, 0.2, I. setosa
|
48
|
+
4.9, 3.1, 1.5, 0.2, I. setosa
|
49
|
+
5.0, 3.2, 1.2, 0.2, I. setosa
|
50
|
+
5.5, 3.5, 1.3, 0.2, I. setosa
|
51
|
+
4.9, 3.6, 1.4, 0.1, I. setosa
|
52
|
+
4.4, 3.0, 1.3, 0.2, I. setosa
|
53
|
+
5.1, 3.4, 1.5, 0.2, I. setosa
|
54
|
+
5.0, 3.5, 1.3, 0.3, I. setosa
|
55
|
+
4.5, 2.3, 1.3, 0.3, I. setosa
|
56
|
+
4.4, 3.2, 1.3, 0.2, I. setosa
|
57
|
+
5.0, 3.5, 1.6, 0.6, I. setosa
|
58
|
+
5.1, 3.8, 1.9, 0.4, I. setosa
|
59
|
+
4.8, 3.0, 1.4, 0.3, I. setosa
|
60
|
+
5.1, 3.8, 1.6, 0.2, I. setosa
|
61
|
+
4.6, 3.2, 1.4, 0.2, I. setosa
|
62
|
+
5.3, 3.7, 1.5, 0.2, I. setosa
|
63
|
+
5.0, 3.3, 1.4, 0.2, I. setosa
|
64
|
+
7.0, 3.2, 4.7, 1.4, I. versicolor
|
65
|
+
6.4, 3.2, 4.5, 1.5, I. versicolor
|
66
|
+
6.9, 3.1, 4.9, 1.5, I. versicolor
|
67
|
+
5.5, 2.3, 4.0, 1.3, I. versicolor
|
68
|
+
6.5, 2.8, 4.6, 1.5, I. versicolor
|
69
|
+
5.7, 2.8, 4.5, 1.3, I. versicolor
|
70
|
+
6.3, 3.3, 4.7, 1.6, I. versicolor
|
71
|
+
4.9, 2.4, 3.3, 1.0, I. versicolor
|
72
|
+
6.6, 2.9, 4.6, 1.3, I. versicolor
|
73
|
+
5.2, 2.7, 3.9, 1.4, I. versicolor
|
74
|
+
5.0, 2.0, 3.5, 1.0, I. versicolor
|
75
|
+
5.9, 3.0, 4.2, 1.5, I. versicolor
|
76
|
+
6.0, 2.2, 4.0, 1.0, I. versicolor
|
77
|
+
6.1, 2.9, 4.7, 1.4, I. versicolor
|
78
|
+
5.6, 2.9, 3.6, 1.3, I. versicolor
|
79
|
+
6.7, 3.1, 4.4, 1.4, I. versicolor
|
80
|
+
5.6, 3.0, 4.5, 1.5, I. versicolor
|
81
|
+
5.8, 2.7, 4.1, 1.0, I. versicolor
|
82
|
+
6.2, 2.2, 4.5, 1.5, I. versicolor
|
83
|
+
5.6, 2.5, 3.9, 1.1, I. versicolor
|
84
|
+
5.9, 3.2, 4.8, 1.8, I. versicolor
|
85
|
+
6.1, 2.8, 4.0, 1.3, I. versicolor
|
86
|
+
6.3, 2.5, 4.9, 1.5, I. versicolor
|
87
|
+
6.1, 2.8, 4.7, 1.2, I. versicolor
|
88
|
+
6.4, 2.9, 4.3, 1.3, I. versicolor
|
89
|
+
6.6, 3.0, 4.4, 1.4, I. versicolor
|
90
|
+
6.8, 2.8, 4.8, 1.4, I. versicolor
|
91
|
+
6.7, 3.0, 5.0, 1.7, I. versicolor
|
92
|
+
6.0, 2.9, 4.5, 1.5, I. versicolor
|
93
|
+
5.7, 2.6, 3.5, 1.0, I. versicolor
|
94
|
+
5.5, 2.4, 3.8, 1.1, I. versicolor
|
95
|
+
5.5, 2.4, 3.7, 1.0, I. versicolor
|
96
|
+
5.8, 2.7, 3.9, 1.2, I. versicolor
|
97
|
+
6.0, 2.7, 5.1, 1.6, I. versicolor
|
98
|
+
5.4, 3.0, 4.5, 1.5, I. versicolor
|
99
|
+
6.0, 3.4, 4.5, 1.6, I. versicolor
|
100
|
+
6.7, 3.1, 4.7, 1.5, I. versicolor
|
101
|
+
6.3, 2.3, 4.4, 1.3, I. versicolor
|
102
|
+
5.6, 3.0, 4.1, 1.3, I. versicolor
|
103
|
+
5.5, 2.5, 4.0, 1.3, I. versicolor
|
104
|
+
5.5, 2.6, 4.4, 1.2, I. versicolor
|
105
|
+
6.1, 3.0, 4.6, 1.4, I. versicolor
|
106
|
+
5.8, 2.6, 4.0, 1.2, I. versicolor
|
107
|
+
5.0, 2.3, 3.3, 1.0, I. versicolor
|
108
|
+
5.6, 2.7, 4.2, 1.3, I. versicolor
|
109
|
+
5.7, 3.0, 4.2, 1.2, I. versicolor
|
110
|
+
5.7, 2.9, 4.2, 1.3, I. versicolor
|
111
|
+
6.2, 2.9, 4.3, 1.3, I. versicolor
|
112
|
+
5.1, 2.5, 3.0, 1.1, I. versicolor
|
113
|
+
5.7, 2.8, 4.1, 1.3, I. versicolor
|
114
|
+
6.3, 3.3, 6.0, 2.5, I. virginica
|
115
|
+
5.8, 2.7, 5.1, 1.9, I. virginica
|
116
|
+
7.1, 3.0, 5.9, 2.1, I. virginica
|
117
|
+
6.3, 2.9, 5.6, 1.8, I. virginica
|
118
|
+
6.5, 3.0, 5.8, 2.2, I. virginica
|
119
|
+
7.6, 3.0, 6.6, 2.1, I. virginica
|
120
|
+
4.9, 2.5, 4.5, 1.7, I. virginica
|
121
|
+
7.3, 2.9, 6.3, 1.8, I. virginica
|
122
|
+
6.7, 2.5, 5.8, 1.8, I. virginica
|
123
|
+
7.2, 3.6, 6.1, 2.5, I. virginica
|
124
|
+
6.5, 3.2, 5.1, 2.0, I. virginica
|
125
|
+
6.4, 2.7, 5.3, 1.9, I. virginica
|
126
|
+
6.8, 3.0, 5.5, 2.1, I. virginica
|
127
|
+
5.7, 2.5, 5.0, 2.0, I. virginica
|
128
|
+
5.8, 2.8, 5.1, 2.4, I. virginica
|
129
|
+
6.4, 3.2, 5.3, 2.3, I. virginica
|
130
|
+
6.5, 3.0, 5.5, 1.8, I. virginica
|
131
|
+
7.7, 3.8, 6.7, 2.2, I. virginica
|
132
|
+
7.7, 2.6, 6.9, 2.3, I. virginica
|
133
|
+
6.0, 2.2, 5.0, 1.5, I. virginica
|
134
|
+
6.9, 3.2, 5.7, 2.3, I. virginica
|
135
|
+
5.6, 2.8, 4.9, 2.0, I. virginica
|
136
|
+
7.7, 2.8, 6.7, 2.0, I. virginica
|
137
|
+
6.3, 2.7, 4.9, 1.8, I. virginica
|
138
|
+
6.7, 3.3, 5.7, 2.1, I. virginica
|
139
|
+
7.2, 3.2, 6.0, 1.8, I. virginica
|
140
|
+
6.2, 2.8, 4.8, 1.8, I. virginica
|
141
|
+
6.1, 3.0, 4.9, 1.8, I. virginica
|
142
|
+
6.4, 2.8, 5.6, 2.1, I. virginica
|
143
|
+
7.2, 3.0, 5.8, 1.6, I. virginica
|
144
|
+
7.4, 2.8, 6.1, 1.9, I. virginica
|
145
|
+
7.9, 3.8, 6.4, 2.0, I. virginica
|
146
|
+
6.4, 2.8, 5.6, 2.2, I. virginica
|
147
|
+
6.3, 2.8, 5.1, 1.5, I. virginica
|
148
|
+
6.1, 2.6, 5.6, 1.4, I. virginica
|
149
|
+
7.7, 3.0, 6.1, 2.3, I. virginica
|
150
|
+
6.3, 3.4, 5.6, 2.4, I. virginica
|
151
|
+
6.4, 3.1, 5.5, 1.8, I. virginica
|
152
|
+
6.0, 3.0, 4.8, 1.8, I. virginica
|
153
|
+
6.9, 3.1, 5.4, 2.1, I. virginica
|
154
|
+
6.7, 3.1, 5.6, 2.4, I. virginica
|
155
|
+
6.9, 3.1, 5.1, 2.3, I. virginica
|
156
|
+
5.8, 2.7, 5.1, 1.9, I. virginica
|
157
|
+
6.8, 3.2, 5.9, 2.3, I. virginica
|
158
|
+
6.7, 3.3, 5.7, 2.5, I. virginica
|
159
|
+
6.7, 3.0, 5.2, 2.3, I. virginica
|
160
|
+
6.3, 2.5, 5.0, 1.9, I. virginica
|
161
|
+
6.5, 3.0, 5.2, 2.0, I. virginica
|
162
|
+
6.2, 3.4, 5.4, 2.3, I. virginica
|
163
|
+
5.9, 3.0, 5.1, 1.8, I. virginica
|
data/datasets/lcc.attrib.csv
CHANGED
@@ -1,14 +1,14 @@
|
|
1
|
-
% Attribute-Relation File Format (ARFF) Example
|
2
|
-
% see https://www.cs.waikato.ac.nz/ml/weka/arff.html
|
3
|
-
|
4
|
-
@relation LCCvsLCSH
|
5
|
-
|
6
|
-
@attribute LCC string
|
7
|
-
@attribute LCSH string
|
8
|
-
|
9
|
-
@data
|
10
|
-
AG5, 'Encyclopedias and dictionaries.;Twentieth century.'
|
11
|
-
AS262, 'Science -- Soviet Union -- History.'
|
12
|
-
AE5, 'Encyclopedias and dictionaries.'
|
13
|
-
AS281, 'Astronomy, Assyro-Babylonian.;Moon -- Phases.'
|
14
|
-
AS281, 'Astronomy, Assyro-Babylonian.;Moon -- Tables.'
|
1
|
+
% Attribute-Relation File Format (ARFF) Example
|
2
|
+
% see https://www.cs.waikato.ac.nz/ml/weka/arff.html
|
3
|
+
|
4
|
+
@relation LCCvsLCSH
|
5
|
+
|
6
|
+
@attribute LCC string
|
7
|
+
@attribute LCSH string
|
8
|
+
|
9
|
+
@data
|
10
|
+
AG5, 'Encyclopedias and dictionaries.;Twentieth century.'
|
11
|
+
AS262, 'Science -- Soviet Union -- History.'
|
12
|
+
AE5, 'Encyclopedias and dictionaries.'
|
13
|
+
AS281, 'Astronomy, Assyro-Babylonian.;Moon -- Phases.'
|
14
|
+
AS281, 'Astronomy, Assyro-Babylonian.;Moon -- Tables.'
|
data/datasets/shakespeare.csv
CHANGED
@@ -1,9 +1,9 @@
|
|
1
|
-
Quote,Play,Cite
|
2
|
-
Sweet are the uses of adversity,As You Like It,"Act 2, scene 1, 12"
|
3
|
-
All the world's a stage,As You Like It,"Act 2, scene 7, 139"
|
4
|
-
"We few, we happy few",Henry V,
|
5
|
-
"""Seems,"" madam! Nay it is; I know not ""seems.""",Hamlet,(1.ii.76)
|
6
|
-
"To be, or not to be",Hamlet,"Act 3, scene 1, 55"
|
7
|
-
What's in a name? That which we call a rose by any other name would smell as sweet.,Romeo and Juliet,"(II, ii, 1-2)"
|
8
|
-
"O Romeo, Romeo, wherefore art thou Romeo?",Romeo and Juliet,"Act 2, scene 2, 33"
|
9
|
-
"Tomorrow, and tomorrow, and tomorrow",Macbeth,"Act 5, scene 5, 19"
|
1
|
+
Quote,Play,Cite
|
2
|
+
Sweet are the uses of adversity,As You Like It,"Act 2, scene 1, 12"
|
3
|
+
All the world's a stage,As You Like It,"Act 2, scene 7, 139"
|
4
|
+
"We few, we happy few",Henry V,
|
5
|
+
"""Seems,"" madam! Nay it is; I know not ""seems.""",Hamlet,(1.ii.76)
|
6
|
+
"To be, or not to be",Hamlet,"Act 3, scene 1, 55"
|
7
|
+
What's in a name? That which we call a rose by any other name would smell as sweet.,Romeo and Juliet,"(II, ii, 1-2)"
|
8
|
+
"O Romeo, Romeo, wherefore art thou Romeo?",Romeo and Juliet,"Act 2, scene 2, 33"
|
9
|
+
"Tomorrow, and tomorrow, and tomorrow",Macbeth,"Act 5, scene 5, 19"
|
data/lib/csvreader/base.rb
CHANGED
@@ -1,14 +1,18 @@
|
|
1
|
-
# encoding: utf-8
|
2
1
|
|
3
2
|
|
4
3
|
require 'pp'
|
5
|
-
require 'logger'
|
4
|
+
require 'logger' ## todo/fix: check why logger is required - use logutils!!!???
|
6
5
|
require 'forwardable'
|
7
6
|
require 'stringio'
|
7
|
+
require 'fileutils'
|
8
|
+
|
9
|
+
require 'time'
|
8
10
|
require 'date' ## use for Date.parse and DateTime.parse
|
9
11
|
require 'yaml' ## used for (optional) meta data blocks
|
12
|
+
require 'json'
|
10
13
|
|
11
14
|
|
15
|
+
## our own parser libs
|
12
16
|
require 'tabreader'
|
13
17
|
require 'csvjson'
|
14
18
|
require 'csvyaml'
|
data/lib/csvreader/buffer.rb
CHANGED
data/lib/csvreader/builder.rb
CHANGED
data/lib/csvreader/converter.rb
CHANGED