to-arff 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +56 -19
- data/lib/to-arff.rb +2 -1
- data/lib/to-arff/sqlitedb.rb +17 -8
- data/lib/to-arff/version.rb +1 -1
- data/to-arff.gemspec +1 -0
- metadata +16 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 507fda68512fc420f96579514e00264c4e4ac0dd
|
|
4
|
+
data.tar.gz: 21c267d6299d777d259c1fa55e9157e10103e38c
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: eaf66b84e11d9bb215ce2c30b1793117e22bbf2e822e91fc86c5a15bdf0fee4716e515baa96a3d5af7fd4a4a7e8d70f5801198227d7a721d1d112e0762f6b7bb
|
|
7
|
+
data.tar.gz: 7a24e6157ce6bcb21b9f9fd2441bcb5742ada88cbc0c224ffc110806d71341e608a1182e37376932fdb49020da86f3ec7e4c86d1048ad1f091e03a43d9fc5b77
|
data/README.md
CHANGED
|
@@ -1,9 +1,12 @@
|
|
|
1
1
|
# ToARFF
|
|
2
|
+

|
|
3
|
+
|
|
2
4
|
[](https://travis-ci.org/dhrubomoy/to-arff)
|
|
3
5
|
[](https://coveralls.io/github/dhrubomoy/to-arff)
|
|
4
6
|
[](https://badge.fury.io/rb/to-arff)
|
|
5
7
|
[](https://gemnasium.com/github.com/dhrubomoy/to-arff)
|
|
6
8
|
[](https://codeclimate.com/github/dhrubomoy/to-arff)
|
|
9
|
+
|
|
7
10
|
##Table of Content
|
|
8
11
|
- [About](#about)
|
|
9
12
|
- [What is an ARFF File](#what-is-an-arff-file)
|
|
@@ -18,7 +21,7 @@ ToARFF is a ruby library to convert SQLite database files to ARFF files (Attribu
|
|
|
18
21
|
|
|
19
22
|
### What is an ARFF File:
|
|
20
23
|
[This wiki](http://weka.wikispaces.com/ARFF+%28book+version%29 ) describes perfectly,
|
|
21
|
-
"An ARFF (Attribute-Relation File Format) file is an ASCII text file that describes a list of instances sharing a set of attributes. ARFF files were developed by the Machine Learning Project at the Department of Computer Science of The University of Waikato for use with the Weka machine learning software."
|
|
24
|
+
> "An ARFF (Attribute-Relation File Format) file is an ASCII text file that describes a list of instances sharing a set of attributes. ARFF files were developed by the Machine Learning Project at the Department of Computer Science of The University of Waikato for use with the Weka machine learning software."
|
|
22
25
|
|
|
23
26
|
**Note:** Converting from an SQLite database will generate one ARFF file per table. See [this stackoverflow post](http://stackoverflow.com/questions/37009995/weka-machine-learning-arff-file-multiple-relations).
|
|
24
27
|
|
|
@@ -42,30 +45,52 @@ Or install it yourself as:
|
|
|
42
45
|
|
|
43
46
|
###Convert from an SQLite Database
|
|
44
47
|
#### By Specifying Column Types (Recommended)
|
|
45
|
-
|
|
48
|
+
Use the convert() method and specify the column/attribute types as a json (or nested hash).
|
|
46
49
|
```ruby
|
|
47
50
|
require 'to-arff'
|
|
48
|
-
#
|
|
49
|
-
sample = ToARFF::SQLiteDB.new "/path/to/
|
|
51
|
+
# Get the db file from https://github.com/dhrubomoy/to-arff/blob/master/spec/sample_db_files/sample2.db
|
|
52
|
+
sample = ToARFF::SQLiteDB.new "/path/to/sample2.db"
|
|
50
53
|
# Attribute names and types must be valid
|
|
51
|
-
# eg. { "table1"
|
|
54
|
+
# eg. { "table1": {"column11"=>"NUMERIC",
|
|
55
|
+
# "column12"=>"STRING"
|
|
56
|
+
# },
|
|
57
|
+
# "table2": {"column21"=>"class {Iris-setosa,Iris-versicolor,Iris-virginica}",
|
|
58
|
+
# "column22"=>"DATE \"yyyy-MM-dd HH:mm:ss\""
|
|
59
|
+
# }
|
|
60
|
+
# }
|
|
61
|
+
# OR { "table1" => {"column11"=>"NUMERIC",
|
|
52
62
|
# "column12"=>"STRING"
|
|
53
63
|
# },
|
|
54
64
|
# "table2" => {"column21"=>"class {Iris-setosa,Iris-versicolor,Iris-virginica}",
|
|
55
65
|
# "column22"=>"DATE \"yyyy-MM-dd HH:mm:ss\""
|
|
56
66
|
# }
|
|
57
|
-
|
|
67
|
+
# }
|
|
68
|
+
sample_column_types_param_json = {
|
|
69
|
+
"albums": {
|
|
70
|
+
"Albumid": "NUMERIC",
|
|
71
|
+
"Title": "STRING"
|
|
72
|
+
},
|
|
73
|
+
"employees": {
|
|
74
|
+
"EmployeeId": "NUMERIC",
|
|
75
|
+
"LastName": "STRING",
|
|
76
|
+
"City": "STRING",
|
|
77
|
+
"HireDate": "DATE 'yyyy-MM-dd HH:mm:ss'"
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
sample_column_types_param_hash = { "employees" => {"EmployeeId"=>"NUMERIC",
|
|
58
81
|
"LastName"=>"STRING",
|
|
59
82
|
"City"=>"STRING",
|
|
60
83
|
"HireDate"=>"DATE \"yyyy-MM-dd HH:mm:ss\""
|
|
61
84
|
},
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
puts sample.convert column_types:
|
|
85
|
+
"albums" => { "Albumid"=>"NUMERIC",
|
|
86
|
+
"Title"=>"STRING"
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
puts sample.convert column_types: sample_column_types_param_json
|
|
90
|
+
#OR
|
|
91
|
+
puts sample.convert column_types: sample_column_types_param_hash
|
|
67
92
|
```
|
|
68
|
-
|
|
93
|
+
Both will produce string similar to following:
|
|
69
94
|
```
|
|
70
95
|
@RELATION employees
|
|
71
96
|
|
|
@@ -96,12 +121,24 @@ We will get something similar to following:
|
|
|
96
121
|
```ruby
|
|
97
122
|
require 'to-arff'
|
|
98
123
|
sample = ToARFF::SQLiteDB.new "/path/to/sample_sqlite.db"
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
124
|
+
# Column names must be specified like this:
|
|
125
|
+
# { "table1" => ["column11", "column12",...],
|
|
126
|
+
# "table2" => ["column21", "column22",...]
|
|
127
|
+
# }
|
|
128
|
+
# OR
|
|
129
|
+
# { "table1": ["column11", "column12",...],
|
|
130
|
+
# "table2": ["column21", "column22",...]
|
|
131
|
+
# }
|
|
132
|
+
sample_columns_json = { "albums": ["AlbumId", "Title", "ArtistId"],
|
|
133
|
+
"employees": ["EmployeeId", "LastName", "FirstName", "Title"]
|
|
134
|
+
}
|
|
135
|
+
sample_columns_hash = { "albums" => ["AlbumId", "Title", "ArtistId"],
|
|
136
|
+
"employees" => ["EmployeeId", "LastName", "FirstName", "Title"]
|
|
137
|
+
}
|
|
138
|
+
puts sample.convert columns: sample_columns_json
|
|
139
|
+
puts sample.convert columns: sample_columns_hash
|
|
103
140
|
```
|
|
104
|
-
|
|
141
|
+
Both json and hash parameters for `columns:` will return string similar to following:
|
|
105
142
|
```
|
|
106
143
|
@RELATION albums
|
|
107
144
|
|
|
@@ -130,7 +167,7 @@ We will get something similar:
|
|
|
130
167
|
```
|
|
131
168
|
As you can see, "HireDate" Attribute didn't have the correct datatype. It should be "DATE "yyyy-MM-dd HH:mm:ss"", not "STRING"
|
|
132
169
|
|
|
133
|
-
|
|
170
|
+
###### You can also do following, but might not generate correct datatypes
|
|
134
171
|
```ruby
|
|
135
172
|
require 'to-arff'
|
|
136
173
|
sample = ToARFF::SQLiteDB.new "/path/to/sample_sqlite.db"
|
|
@@ -146,7 +183,7 @@ Bug reports and pull requests are welcome on GitHub at https://github.com/dhrubo
|
|
|
146
183
|
1. Fork it ( https://github.com/dhrubomoy/to-arff/fork )
|
|
147
184
|
2. Create branch (`git checkout -b my-new-feature`)
|
|
148
185
|
3. Make changes. Add test cases for your changes
|
|
149
|
-
4. Run `
|
|
186
|
+
4. Run `rake spec/` and make sure all the test passes
|
|
150
187
|
5. Commit your changes (`git commit -am 'Add some feature'`)
|
|
151
188
|
6. Push to the branch (`git push origin my-new-feature`)
|
|
152
189
|
7. Create new Pull Request
|
data/lib/to-arff.rb
CHANGED
data/lib/to-arff/sqlitedb.rb
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
require 'to-arff/version'
|
|
2
2
|
require 'sqlite3'
|
|
3
|
+
require 'json'
|
|
3
4
|
|
|
4
5
|
module ToARFF
|
|
5
|
-
|
|
6
|
+
RELATION_MARKER = '@RELATION'.freeze
|
|
6
7
|
ATTRIBUTE_MARKER = '@ATTRIBUTE'.freeze
|
|
7
8
|
DATA_MARKER = '@DATA'.freeze
|
|
8
9
|
ATTRIBUTE_TYPE_NUMERIC = 'NUMERIC'.freeze
|
|
@@ -12,7 +13,7 @@ module ToARFF
|
|
|
12
13
|
attr_accessor :db_file_path, :db, :tables, :columns, :column_type
|
|
13
14
|
|
|
14
15
|
def initialize(path)
|
|
15
|
-
|
|
16
|
+
@db_file_path = path
|
|
16
17
|
@tables = []
|
|
17
18
|
@columns = {}
|
|
18
19
|
@column_type = {}
|
|
@@ -94,16 +95,16 @@ module ToARFF
|
|
|
94
95
|
end
|
|
95
96
|
|
|
96
97
|
def write_data(table_name, columns)
|
|
97
|
-
rel =
|
|
98
|
-
columns_str =
|
|
98
|
+
rel = ''
|
|
99
|
+
columns_str = ''
|
|
99
100
|
columns.each do |col|
|
|
100
|
-
columns_str += col +
|
|
101
|
+
columns_str += col + ', '
|
|
101
102
|
end
|
|
102
|
-
columns_str = columns_str.chomp(
|
|
103
|
+
columns_str = columns_str.chomp(', ')
|
|
103
104
|
rel << "\n#{DATA_MARKER}\n"
|
|
104
105
|
data = @db.prepare "SELECT #{columns_str} FROM #{table_name}"
|
|
105
106
|
data.each do |elem|
|
|
106
|
-
row =
|
|
107
|
+
row = ''
|
|
107
108
|
elem.each do |val|
|
|
108
109
|
if val.is_a? Numeric
|
|
109
110
|
row = row + "#{val}" + ","
|
|
@@ -191,6 +192,14 @@ module ToARFF
|
|
|
191
192
|
end
|
|
192
193
|
end
|
|
193
194
|
|
|
195
|
+
def stringify_all_keys(hash)
|
|
196
|
+
stringified_hash = {}
|
|
197
|
+
hash.each do |k, v|
|
|
198
|
+
stringified_hash[k.to_s] = v.is_a?(Hash) ? stringify_all_keys(v) : v
|
|
199
|
+
end
|
|
200
|
+
stringified_hash
|
|
201
|
+
end
|
|
202
|
+
|
|
194
203
|
def convert(options={})
|
|
195
204
|
temp_tables = options.fetch(:tables, Array.new)
|
|
196
205
|
temp_columns = options.fetch(:columns, Hash.new)
|
|
@@ -205,7 +214,7 @@ module ToARFF
|
|
|
205
214
|
if valid_option_given(options)
|
|
206
215
|
raise ArgumentError.new("Wrong parameter name \":#{options.keys.first}\"")
|
|
207
216
|
else
|
|
208
|
-
deal_with_valid_option(temp_tables, temp_columns, temp_column_types, res)
|
|
217
|
+
deal_with_valid_option(temp_tables, stringify_all_keys(temp_columns), stringify_all_keys(temp_column_types), res)
|
|
209
218
|
end
|
|
210
219
|
elsif param_count > 1
|
|
211
220
|
raise ArgumentError.new("You can specify only one out of the three parameters: table, columns, column_types.")
|
data/lib/to-arff/version.rb
CHANGED
data/to-arff.gemspec
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: to-arff
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- dhrubo_moy
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2016-08-
|
|
11
|
+
date: 2016-08-31 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: bundler
|
|
@@ -52,6 +52,20 @@ dependencies:
|
|
|
52
52
|
- - "~>"
|
|
53
53
|
- !ruby/object:Gem::Version
|
|
54
54
|
version: '1.3'
|
|
55
|
+
- !ruby/object:Gem::Dependency
|
|
56
|
+
name: json
|
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
|
58
|
+
requirements:
|
|
59
|
+
- - ">="
|
|
60
|
+
- !ruby/object:Gem::Version
|
|
61
|
+
version: '0'
|
|
62
|
+
type: :development
|
|
63
|
+
prerelease: false
|
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
65
|
+
requirements:
|
|
66
|
+
- - ">="
|
|
67
|
+
- !ruby/object:Gem::Version
|
|
68
|
+
version: '0'
|
|
55
69
|
description:
|
|
56
70
|
email:
|
|
57
71
|
- dhrubo_moy@yahoo.com
|