data_kit 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +18 -0
- data/.rspec +2 -0
- data/.travis.yml +7 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +44 -0
- data/LICENSE +20 -0
- data/README.md +22 -0
- data/Rakefile +6 -0
- data/data_kit.gemspec +31 -0
- data/lib/data_kit/converters/boolean.rb +16 -0
- data/lib/data_kit/converters/date_time.rb +21 -0
- data/lib/data_kit/converters/integer.rb +24 -0
- data/lib/data_kit/converters/number.rb +28 -0
- data/lib/data_kit/csv/analysis.rb +69 -0
- data/lib/data_kit/csv/analyzer.rb +52 -0
- data/lib/data_kit/csv/converter.rb +62 -0
- data/lib/data_kit/csv/parser.rb +55 -0
- data/lib/data_kit/dataset/field.rb +58 -0
- data/lib/data_kit/dataset/schema.rb +21 -0
- data/lib/data_kit/patches/rcsv.rb +121 -0
- data/lib/data_kit/version.rb +3 -0
- data/lib/data_kit.rb +20 -0
- data/spec/converters/boolean_spec.rb +18 -0
- data/spec/converters/date_time_spec.rb +30 -0
- data/spec/converters/integer_spec.rb +20 -0
- data/spec/converters/number_spec.rb +20 -0
- data/spec/csv/analysis_spec.rb +55 -0
- data/spec/csv/analyzer_spec.rb +56 -0
- data/spec/csv/converter_spec.rb +35 -0
- data/spec/csv/parser_spec.rb +50 -0
- data/spec/dataset/field_spec.rb +95 -0
- data/spec/dataset/schema_spec.rb +22 -0
- data/spec/fixtures/carriage_returns.csv +1 -0
- data/spec/fixtures/standard.csv +11 -0
- data/spec/spec_helper.rb +18 -0
- metadata +193 -0
@@ -0,0 +1,121 @@
|
|
1
|
+
require "rcsv/rcsv"
|
2
|
+
require "rcsv/version"
|
3
|
+
|
4
|
+
require "stringio"
|
5
|
+
|
6
|
+
#
|
7
|
+
# This is a temporary monkey patch to Rcsv.parse
|
8
|
+
# to silence warnings in Ruby 2 about #lines being deprecated
|
9
|
+
#
|
10
|
+
|
11
|
+
class Rcsv
|
12
|
+
def self.parse(csv_data, options = {}, &block)
|
13
|
+
options[:header] ||= :use
|
14
|
+
raw_options = {}
|
15
|
+
|
16
|
+
raw_options[:col_sep] = options[:column_separator] && options[:column_separator][0] || ','
|
17
|
+
raw_options[:offset_rows] = options[:offset_rows] || 0
|
18
|
+
raw_options[:nostrict] = options[:nostrict]
|
19
|
+
raw_options[:parse_empty_fields_as] = options[:parse_empty_fields_as]
|
20
|
+
raw_options[:buffer_size] = options[:buffer_size] || 1024 * 1024 # 1 MiB
|
21
|
+
|
22
|
+
if csv_data.is_a?(String)
|
23
|
+
csv_data = StringIO.new(csv_data)
|
24
|
+
elsif !(csv_data.respond_to?(:lines) && csv_data.respond_to?(:read))
|
25
|
+
inspected_csv_data = csv_data.inspect
|
26
|
+
raise ParseError.new("Supplied CSV object #{inspected_csv_data[0..127]}#{inspected_csv_data.size > 128 ? '...' : ''} is neither String nor looks like IO object.")
|
27
|
+
end
|
28
|
+
|
29
|
+
if csv_data.respond_to?(:external_encoding)
|
30
|
+
raw_options[:output_encoding] = csv_data.external_encoding.to_s
|
31
|
+
end
|
32
|
+
|
33
|
+
initial_position = csv_data.pos
|
34
|
+
|
35
|
+
case options[:header]
|
36
|
+
when :use
|
37
|
+
header = self.raw_parse(StringIO.new(csv_data.each_line.first), raw_options).first
|
38
|
+
raw_options[:offset_rows] += 1
|
39
|
+
when :skip
|
40
|
+
header = (0..(csv_data.each_line.first.split(raw_options[:col_sep]).count)).to_a
|
41
|
+
raw_options[:offset_rows] += 1
|
42
|
+
when :none
|
43
|
+
header = (0..(csv_data.each_line.first.split(raw_options[:col_sep]).count)).to_a
|
44
|
+
end
|
45
|
+
|
46
|
+
raw_options[:row_as_hash] = options[:row_as_hash] # Setting after header parsing
|
47
|
+
|
48
|
+
if options[:columns]
|
49
|
+
only_rows = []
|
50
|
+
except_rows = []
|
51
|
+
row_defaults = []
|
52
|
+
column_names = []
|
53
|
+
row_conversions = ''
|
54
|
+
|
55
|
+
header.each do |column_header|
|
56
|
+
column_options = options[:columns][column_header]
|
57
|
+
if column_options
|
58
|
+
if (options[:row_as_hash])
|
59
|
+
column_names << (column_options[:alias] || column_header)
|
60
|
+
end
|
61
|
+
|
62
|
+
row_defaults << column_options[:default] || nil
|
63
|
+
|
64
|
+
only_rows << case column_options[:match]
|
65
|
+
when Array
|
66
|
+
column_options[:match]
|
67
|
+
when nil
|
68
|
+
nil
|
69
|
+
else
|
70
|
+
[column_options[:match]]
|
71
|
+
end
|
72
|
+
|
73
|
+
except_rows << case column_options[:not_match]
|
74
|
+
when Array
|
75
|
+
column_options[:not_match]
|
76
|
+
when nil
|
77
|
+
nil
|
78
|
+
else
|
79
|
+
[column_options[:not_match]]
|
80
|
+
end
|
81
|
+
|
82
|
+
row_conversions << case column_options[:type]
|
83
|
+
when :int
|
84
|
+
'i'
|
85
|
+
when :float
|
86
|
+
'f'
|
87
|
+
when :string
|
88
|
+
's'
|
89
|
+
when :bool
|
90
|
+
'b'
|
91
|
+
when nil
|
92
|
+
's' # strings by default
|
93
|
+
else
|
94
|
+
fail "Unknown column type #{column_options[:type].inspect}."
|
95
|
+
end
|
96
|
+
elsif options[:only_listed_columns]
|
97
|
+
column_names << nil
|
98
|
+
row_defaults << nil
|
99
|
+
only_rows << nil
|
100
|
+
except_rows << nil
|
101
|
+
row_conversions << ' '
|
102
|
+
else
|
103
|
+
column_names << column_header
|
104
|
+
row_defaults << nil
|
105
|
+
only_rows << nil
|
106
|
+
except_rows << nil
|
107
|
+
row_conversions << 's'
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
raw_options[:column_names] = column_names if options[:row_as_hash]
|
112
|
+
raw_options[:only_rows] = only_rows unless only_rows.compact.empty?
|
113
|
+
raw_options[:except_rows] = except_rows unless except_rows.compact.empty?
|
114
|
+
raw_options[:row_defaults] = row_defaults unless row_defaults.compact.empty?
|
115
|
+
raw_options[:row_conversions] = row_conversions
|
116
|
+
end
|
117
|
+
|
118
|
+
csv_data.pos = initial_position
|
119
|
+
return self.raw_parse(csv_data, raw_options, &block)
|
120
|
+
end
|
121
|
+
end
|
data/lib/data_kit.rb
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
require "data_kit/version"
|
2
|
+
|
3
|
+
# Data I/O
|
4
|
+
require 'data_kit/csv/parser'
|
5
|
+
require 'data_kit/csv/analyzer'
|
6
|
+
require 'data_kit/csv/analysis'
|
7
|
+
require 'data_kit/csv/converter'
|
8
|
+
|
9
|
+
# Data Conversion
|
10
|
+
require 'data_kit/converters/number'
|
11
|
+
require 'data_kit/converters/integer'
|
12
|
+
require 'data_kit/converters/boolean'
|
13
|
+
require 'data_kit/converters/date_time'
|
14
|
+
|
15
|
+
# Datasets
|
16
|
+
require 'data_kit/dataset/field'
|
17
|
+
require 'data_kit/dataset/schema'
|
18
|
+
|
19
|
+
# Patches / Fixes
|
20
|
+
require 'data_kit/patches/rcsv'
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe DataKit::Converters::Boolean do
|
4
|
+
it "should match values" do
|
5
|
+
['true', 't', 'false', 'f'].each do |testcase|
|
6
|
+
DataKit::Converters::Boolean.match?(testcase).should == true
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
10
|
+
it "should convert value it can match" do
|
11
|
+
{
|
12
|
+
"true" => true, "t" => true,
|
13
|
+
"false" => false, "f" => false
|
14
|
+
}.each do |testcase, result|
|
15
|
+
DataKit::Converters::Boolean.convert(testcase).should == result
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe DataKit::Converters::DateTime do
|
4
|
+
it "should match a date" do
|
5
|
+
DataKit::Converters::DateTime.match?("1/1/00").should == true
|
6
|
+
DataKit::Converters::DateTime.match?("2010-01-01").should == true
|
7
|
+
DataKit::Converters::DateTime.match?("2010-01-01 12:00:00").should == true
|
8
|
+
DataKit::Converters::DateTime.match?("2000-01-01T00:00:00Z").should == true
|
9
|
+
DataKit::Converters::DateTime.match?("2000-01-01T00:00:00+00:00").should == true
|
10
|
+
DataKit::Converters::DateTime.match?("10/16/10 18:24").should == true
|
11
|
+
DataKit::Converters::DateTime.match?("10/16/10 1:24:15").should == true
|
12
|
+
end
|
13
|
+
|
14
|
+
it "should convert dates it can match" do
|
15
|
+
{
|
16
|
+
'1/1/00' => "2000-01-01 00:00:00",
|
17
|
+
'2010-01-01' => '2010-01-01 00:00:00',
|
18
|
+
'2010-01-01 12:00:00' => '2010-01-01 12:00:00',
|
19
|
+
'2000-01-01T00:00:00' => '2000-01-01 00:00:00',
|
20
|
+
'2000-01-01T00:00:00Z' => '2000-01-01 00:00:00',
|
21
|
+
'2000-02-01T00:00:00+00:00' => '2000-02-01 00:00:00',
|
22
|
+
'10/1/2012 10:27:45.000000 AM' => '2012-10-01 10:27:45',
|
23
|
+
'10/1/2012 1:27:45.000000 AM' => '2012-10-01 01:27:45',
|
24
|
+
'1/1/2012 1:27:45.000000 AM' => '2012-01-01 01:27:45',
|
25
|
+
"10/16/10 18:24" => '2010-10-16 18:24:00'
|
26
|
+
}.each do |testcase, result|
|
27
|
+
DataKit::Converters::DateTime.convert(testcase).strftime("%Y-%m-%d %H:%M:%S").should == result
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe DataKit::Converters::Integer do
|
4
|
+
it "should match values" do
|
5
|
+
["100", "-100", "1,000", "$1,000"].each do |integer|
|
6
|
+
reformatted = DataKit::Converters::Integer.reformat(integer)
|
7
|
+
DataKit::Converters::Integer.match?(reformatted).should == true
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
it "should convert value it can match" do
|
12
|
+
{
|
13
|
+
"100" => 100, "-100" => -100,
|
14
|
+
"1,000" => 1000, "$1,000" => 1000
|
15
|
+
}.each do |testcase, result|
|
16
|
+
reformatted = DataKit::Converters::Integer.reformat(testcase)
|
17
|
+
DataKit::Converters::Integer.convert(reformatted).should == result
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe DataKit::Converters::Integer do
|
4
|
+
it "should match values" do
|
5
|
+
["100.0", "-100.5", "-1,000.00", "5.6E11", "$1,000.21"].each do |number|
|
6
|
+
reformatted = DataKit::Converters::Number.reformat(number)
|
7
|
+
DataKit::Converters::Number.match?(reformatted).should == true
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
it "should convert value it can match" do
|
12
|
+
{
|
13
|
+
"100.0" => 100.0, "-100.5" => -100.5,
|
14
|
+
"-1,000.00" => -1000.00, "5.6E11" => 5.6E11, "$1,000.21" => 1000.21
|
15
|
+
}.each do |testcase, result|
|
16
|
+
reformatted = DataKit::Converters::Number.reformat(testcase)
|
17
|
+
DataKit::Converters::Number.convert(reformatted).should == result
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe DataKit::CSV::Analysis do
|
4
|
+
it "should insert a row for analysis" do
|
5
|
+
analysis = DataKit::CSV::Analysis.new(['field1'])
|
6
|
+
|
7
|
+
analysis.insert 'field1', '1.0'
|
8
|
+
analysis.insert 'field1', '2.0'
|
9
|
+
|
10
|
+
analysis.type_list('field1').should == [:number]
|
11
|
+
end
|
12
|
+
|
13
|
+
it "should know the number of rows with a particular type" do
|
14
|
+
analysis = DataKit::CSV::Analysis.new(['field1'])
|
15
|
+
|
16
|
+
analysis.insert 'field1', '1.0'
|
17
|
+
analysis.insert 'field1', '2.0'
|
18
|
+
|
19
|
+
analysis.type_count('field1', :number).should == 2
|
20
|
+
end
|
21
|
+
|
22
|
+
it "should determine the type of a field" do
|
23
|
+
analysis = DataKit::CSV::Analysis.new(['field1', 'field2'])
|
24
|
+
|
25
|
+
analysis.insert 'field1', '1.0'
|
26
|
+
analysis.insert 'field1', '2.0'
|
27
|
+
analysis.insert 'field2', 'str'
|
28
|
+
analysis.insert 'field2', 'str2'
|
29
|
+
|
30
|
+
analysis.type?('field1').should == :number
|
31
|
+
analysis.type?('field2').should == :string
|
32
|
+
end
|
33
|
+
|
34
|
+
it "should infer a string type if there non-numeric mixed types" do
|
35
|
+
analysis = DataKit::CSV::Analysis.new(['field1', 'field2'])
|
36
|
+
|
37
|
+
analysis.insert 'field1', '1.0'
|
38
|
+
analysis.insert 'field1', '2.0'
|
39
|
+
analysis.insert 'field2', '2.0'
|
40
|
+
analysis.insert 'field2', 'str2'
|
41
|
+
|
42
|
+
analysis.type?('field1').should == :number
|
43
|
+
analysis.type?('field2').should == :string
|
44
|
+
end
|
45
|
+
|
46
|
+
it "should infer a number type if there are mixed numeric types" do
|
47
|
+
analysis = DataKit::CSV::Analysis.new(['field1'])
|
48
|
+
|
49
|
+
analysis.insert 'field1', '1.0'
|
50
|
+
analysis.insert 'field1', '20'
|
51
|
+
analysis.insert 'field1', nil
|
52
|
+
|
53
|
+
analysis.type?('field1').should == :number
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe DataKit::CSV::Analyzer do
|
4
|
+
let(:path) {
|
5
|
+
data_path('standard.csv')
|
6
|
+
}
|
7
|
+
|
8
|
+
let(:csv) {
|
9
|
+
DataKit::CSV::Parser.new(path)
|
10
|
+
}
|
11
|
+
|
12
|
+
let(:iocsv) {
|
13
|
+
DataKit::CSV::Parser.new(File.open(path))
|
14
|
+
}
|
15
|
+
|
16
|
+
it "should initialize" do
|
17
|
+
analyzer = DataKit::CSV::Analyzer.new(csv)
|
18
|
+
|
19
|
+
analyzer.csv.should == csv
|
20
|
+
analyzer.keys.should == []
|
21
|
+
analyzer.sample_rate.should == 0.1
|
22
|
+
end
|
23
|
+
|
24
|
+
it "should initialize schema with an IO" do
|
25
|
+
analyzer = DataKit::CSV::Analyzer.new(iocsv)
|
26
|
+
|
27
|
+
analyzer.csv.should == iocsv
|
28
|
+
analyzer.keys.should == []
|
29
|
+
analyzer.sample_rate.should == 0.1
|
30
|
+
end
|
31
|
+
|
32
|
+
it "should execute an analysis" do
|
33
|
+
analysis = DataKit::CSV::Analyzer.new(csv, :sample_rate => 0.5).execute
|
34
|
+
|
35
|
+
puts analysis.inspect
|
36
|
+
|
37
|
+
analysis.type?('id').should == :integer
|
38
|
+
analysis.type?('first_name').should == :string
|
39
|
+
analysis.type?('last_name').should == :string
|
40
|
+
analysis.type?('email').should == :string
|
41
|
+
analysis.type?('country').should == :string
|
42
|
+
analysis.type?('ip_address').should == :string
|
43
|
+
analysis.type?('amount').should == :number
|
44
|
+
analysis.type?('active').should == :boolean
|
45
|
+
analysis.type?('activated_at').should == :datetime
|
46
|
+
analysis.type?('address').should == :string
|
47
|
+
|
48
|
+
analysis.row_count.should == 10
|
49
|
+
analysis.sample_count.should be < 10
|
50
|
+
end
|
51
|
+
|
52
|
+
it "should calculate a sample_rate" do
|
53
|
+
DataKit::CSV::Analyzer.sample_rate(1024).should == 1
|
54
|
+
DataKit::CSV::Analyzer.sample_rate(2048 * 2048).should be < 1
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe DataKit::CSV::Converter do
|
4
|
+
let(:tmpdir) {
|
5
|
+
Dir.mktmpdir
|
6
|
+
}
|
7
|
+
|
8
|
+
let(:target) {
|
9
|
+
File.join(tmpdir, 'data.csv')
|
10
|
+
}
|
11
|
+
|
12
|
+
let(:csv) {
|
13
|
+
DataKit::CSV::Parser.new(data_path('standard.csv'))
|
14
|
+
}
|
15
|
+
|
16
|
+
it "should initialize and execute" do
|
17
|
+
analysis = DataKit::CSV::Analyzer.analyze(csv, :sample_rate => 1)
|
18
|
+
converter = DataKit::CSV::Converter.new(csv, analysis, target)
|
19
|
+
|
20
|
+
converter.execute
|
21
|
+
|
22
|
+
row_count = 0
|
23
|
+
CSV.open(target).each { |row| row_count += 1 }
|
24
|
+
row_count.should == 11
|
25
|
+
end
|
26
|
+
|
27
|
+
it "should convert using the convience method" do
|
28
|
+
analysis = DataKit::CSV::Analyzer.analyze(csv, :sample_rate => 1)
|
29
|
+
converter = DataKit::CSV::Converter.convert(csv, analysis, target)
|
30
|
+
|
31
|
+
row_count = 0
|
32
|
+
CSV.open(target).each { |row| row_count += 1 }
|
33
|
+
row_count.should == 11
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe DataKit::CSV::Parser do
|
4
|
+
let(:path) {
|
5
|
+
data_path('standard.csv')
|
6
|
+
}
|
7
|
+
|
8
|
+
let(:crlf_path) {
|
9
|
+
data_path('carriage_returns.csv')
|
10
|
+
}
|
11
|
+
|
12
|
+
it "should initialize" do
|
13
|
+
csv = DataKit::CSV::Parser.new(path)
|
14
|
+
|
15
|
+
csv.path.should == path
|
16
|
+
end
|
17
|
+
|
18
|
+
it "should enumerate rows with a string path" do
|
19
|
+
csv = DataKit::CSV::Parser.new(path)
|
20
|
+
|
21
|
+
count = 0
|
22
|
+
csv.each_row do |row|
|
23
|
+
count += 1
|
24
|
+
end
|
25
|
+
|
26
|
+
count.should == 10
|
27
|
+
end
|
28
|
+
|
29
|
+
it "should enumerate rows with an IO path" do
|
30
|
+
csv = DataKit::CSV::Parser.new(File.open(path))
|
31
|
+
|
32
|
+
count = 0
|
33
|
+
csv.each_row do |row|
|
34
|
+
count += 1
|
35
|
+
end
|
36
|
+
|
37
|
+
count.should == 10
|
38
|
+
end
|
39
|
+
|
40
|
+
it "should enumerate rows for lines separated by CRLF" do
|
41
|
+
csv = DataKit::CSV::Parser.new(File.open(crlf_path))
|
42
|
+
|
43
|
+
count = 0
|
44
|
+
csv.each_row do |row|
|
45
|
+
count += 1
|
46
|
+
end
|
47
|
+
|
48
|
+
count.should == 10
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,95 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe DataKit::Dataset::Field do
|
4
|
+
it "should initialize" do
|
5
|
+
field = DataKit::Dataset::Field.new('field_name')
|
6
|
+
|
7
|
+
field.name.should == 'field_name'
|
8
|
+
field.key?.should == false
|
9
|
+
field.type.should == :string
|
10
|
+
end
|
11
|
+
|
12
|
+
it "should serialize" do
|
13
|
+
field = DataKit::Dataset::Field.new('field_name')
|
14
|
+
|
15
|
+
field.to_hash.should == {
|
16
|
+
'name' => 'field_name', 'type' => 'string', 'key' => false
|
17
|
+
}
|
18
|
+
end
|
19
|
+
|
20
|
+
it "should infer nil types" do
|
21
|
+
DataKit::Dataset::Field.type?(nil).should == :null
|
22
|
+
end
|
23
|
+
|
24
|
+
it "should infer integer types" do
|
25
|
+
DataKit::Dataset::Field.type?("100").should == :integer
|
26
|
+
DataKit::Dataset::Field.type?("-100").should == :integer
|
27
|
+
DataKit::Dataset::Field.type?("1,000").should == :integer
|
28
|
+
DataKit::Dataset::Field.type?("$1,000").should == :integer
|
29
|
+
end
|
30
|
+
|
31
|
+
it "should infer numeric types" do
|
32
|
+
DataKit::Dataset::Field.type?("100.0").should == :number
|
33
|
+
DataKit::Dataset::Field.type?("-100.5").should == :number
|
34
|
+
DataKit::Dataset::Field.type?("5.6E11").should == :number
|
35
|
+
DataKit::Dataset::Field.type?("-1,000.0").should == :number
|
36
|
+
DataKit::Dataset::Field.type?("$1,000.0").should == :number
|
37
|
+
end
|
38
|
+
|
39
|
+
it "should infer date types" do
|
40
|
+
DataKit::Dataset::Field.type?("2010-01-01").should == :datetime
|
41
|
+
|
42
|
+
# Excel makes everyone sad
|
43
|
+
DataKit::Dataset::Field.type?("1/1/00").should == :datetime
|
44
|
+
end
|
45
|
+
|
46
|
+
it "should infer date/time types" do
|
47
|
+
DataKit::Dataset::Field.type?("2010-01-01 12:00:00").should == :datetime
|
48
|
+
end
|
49
|
+
|
50
|
+
it "should infer boolean types" do
|
51
|
+
DataKit::Dataset::Field.type?("true").should == :boolean
|
52
|
+
DataKit::Dataset::Field.type?("false").should == :boolean
|
53
|
+
end
|
54
|
+
|
55
|
+
it "should infer string types" do
|
56
|
+
DataKit::Dataset::Field.type?("true5").should == :string
|
57
|
+
DataKit::Dataset::Field.type?("my string").should == :string
|
58
|
+
end
|
59
|
+
|
60
|
+
it "should convert nil values" do
|
61
|
+
DataKit::Dataset::Field.convert(nil, :string).should == nil
|
62
|
+
end
|
63
|
+
|
64
|
+
it "should convert integer values" do
|
65
|
+
DataKit::Dataset::Field.convert("100", :integer).should == 100
|
66
|
+
DataKit::Dataset::Field.convert("-100", :integer).should == -100
|
67
|
+
DataKit::Dataset::Field.convert("1,000", :integer).should == 1_000
|
68
|
+
DataKit::Dataset::Field.convert("$1,000", :integer).should == 1_000
|
69
|
+
end
|
70
|
+
|
71
|
+
it "should convert numeric values" do
|
72
|
+
DataKit::Dataset::Field.convert("100.0", :number).should == 100.0
|
73
|
+
DataKit::Dataset::Field.convert("-100.0", :number).should == -100.0
|
74
|
+
DataKit::Dataset::Field.convert("-1,000.0", :number).should == -1_000.0
|
75
|
+
DataKit::Dataset::Field.convert("5E5", :number).should == 500000.0
|
76
|
+
DataKit::Dataset::Field.convert("$1,000.0", :number).should == 1000.0
|
77
|
+
end
|
78
|
+
|
79
|
+
it "should convert boolean values" do
|
80
|
+
DataKit::Dataset::Field.convert("true", :boolean).should == true
|
81
|
+
DataKit::Dataset::Field.convert("false", :boolean).should == false
|
82
|
+
end
|
83
|
+
|
84
|
+
it "should convert date values" do
|
85
|
+
DataKit::Dataset::Field.convert("2010-01-01", :datetime).strftime("%Y-%m-%d %H:%M:%S").should == '2010-01-01 00:00:00'
|
86
|
+
end
|
87
|
+
|
88
|
+
it "should convert date/time values" do
|
89
|
+
DataKit::Dataset::Field.convert("2010-01-01 12:00:00", :datetime).strftime("%Y-%m-%d %H:%M:%S").should == '2010-01-01 12:00:00'
|
90
|
+
end
|
91
|
+
|
92
|
+
it "should convert string values" do
|
93
|
+
DataKit::Dataset::Field.convert(500, :string).should == "500"
|
94
|
+
end
|
95
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe DataKit::Dataset::Schema do
|
4
|
+
it "should initialize" do
|
5
|
+
schema = DataKit::Dataset::Schema.new
|
6
|
+
schema.fields.should == []
|
7
|
+
end
|
8
|
+
|
9
|
+
it "should return a list of keys" do
|
10
|
+
schema = DataKit::Dataset::Schema.new
|
11
|
+
schema.fields << DataKit::Dataset::Field.new('field')
|
12
|
+
schema.fields << DataKit::Dataset::Field.new('field2', key: true)
|
13
|
+
|
14
|
+
schema.keys.length.should == 1
|
15
|
+
end
|
16
|
+
|
17
|
+
it "should serialize to yaml" do
|
18
|
+
schema = DataKit::Dataset::Schema.new
|
19
|
+
schema.fields << DataKit::Dataset::Field.new('field')
|
20
|
+
schema.to_yaml.should == schema.fields.collect(&:to_hash).to_yaml
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
draft_order,player,position,avg_draft_position,avg_bid_value
|
@@ -0,0 +1,11 @@
|
|
1
|
+
id,first_name,last_name,email,country,ip_address,amount,active,activated_at,address
|
2
|
+
1,Todd,Hamilton,thamilton@plambee.edu,Norfolk Island,66.157.128.241,$7.72,true,10/3/1955,0582 Dwight Street
|
3
|
+
2,Melissa,Kelly,mkelly@twinte.name,Singapore,204.221.167.233,$4.33,false,7/9/2013,06 Cardinal Crossing
|
4
|
+
3,Donald,Wheeler,dwheeler@edgeify.mil,Madagascar,34.201.104.193,$2.92,true,12/12/1993,4 Del Sol Hill
|
5
|
+
4,Ruby,Hall,rhall@cogilith.com,USSR,237.243.109.67,$8.27,false,12/15/1975,7 Ramsey Avenue
|
6
|
+
5,Jessica,Cole,jcole@shuffletag.info,Cyprus,25.40.138.137,$8.16,false,6/2/1939,8142 Novick Hill
|
7
|
+
6,Doris,Nelson,dnelson@zoombox.biz,Svalbard and Jan Mayen Islands,233.43.155.229,$6.26,false,5/23/1948,596 Veith Road
|
8
|
+
7,Robert,Hansen,rhansen@miboo.edu,Ghana,41.194.33.211,$4.90,true,9/28/1999,529 Oak Pass
|
9
|
+
8,Matthew,Freeman,mfreeman@midel.name,Sudan,53.186.162.65,$6.63,false,5/24/1996,70682 Declaration Center
|
10
|
+
9,Julia,Nelson,jnelson@skajo.net,Vatican City State (Holy See),249.49.124.178,$9.80,true,1/26/1940,96 Hermina Lane
|
11
|
+
10,Wanda,Palmer,wpalmer@ntags.biz,Indonesia,0.200.163.200,$5.89,false,1/1/1959,26837 Donald Trail
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
require "codeclimate-test-reporter"
|
2
|
+
CodeClimate::TestReporter.start
|
3
|
+
|
4
|
+
require 'rubygems'
|
5
|
+
require 'bundler/setup'
|
6
|
+
|
7
|
+
SimpleCov.start do
|
8
|
+
add_filter "/spec"
|
9
|
+
end
|
10
|
+
|
11
|
+
require 'tmpdir'
|
12
|
+
require 'data_kit'
|
13
|
+
|
14
|
+
RSpec.configure do |config|
|
15
|
+
def data_path(file)
|
16
|
+
File.join(File.dirname(__FILE__), 'fixtures', file)
|
17
|
+
end
|
18
|
+
end
|