daru-io 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +11 -0
- data/.rspec +2 -0
- data/.rspec_formatter.rb +24 -0
- data/.rubocop.yml +109 -0
- data/.travis.yml +30 -0
- data/.yardopts +2 -0
- data/CODE_OF_CONDUCT.md +46 -0
- data/CONTRIBUTING.md +65 -0
- data/Gemfile +20 -0
- data/Guardfile +7 -0
- data/LICENSE.md +21 -0
- data/README.md +654 -0
- data/Rakefile +12 -0
- data/daru-io.gemspec +39 -0
- data/lib/daru/io.rb +3 -0
- data/lib/daru/io/base.rb +45 -0
- data/lib/daru/io/exporters.rb +1 -0
- data/lib/daru/io/exporters/avro.rb +96 -0
- data/lib/daru/io/exporters/base.rb +54 -0
- data/lib/daru/io/exporters/csv.rb +103 -0
- data/lib/daru/io/exporters/excel.rb +148 -0
- data/lib/daru/io/exporters/json.rb +570 -0
- data/lib/daru/io/exporters/r_data.rb +66 -0
- data/lib/daru/io/exporters/rds.rb +79 -0
- data/lib/daru/io/exporters/sql.rb +55 -0
- data/lib/daru/io/importers.rb +1 -0
- data/lib/daru/io/importers/active_record.rb +75 -0
- data/lib/daru/io/importers/avro.rb +54 -0
- data/lib/daru/io/importers/base.rb +62 -0
- data/lib/daru/io/importers/csv.rb +190 -0
- data/lib/daru/io/importers/excel.rb +99 -0
- data/lib/daru/io/importers/excelx.rb +138 -0
- data/lib/daru/io/importers/html.rb +144 -0
- data/lib/daru/io/importers/json.rb +152 -0
- data/lib/daru/io/importers/mongo.rb +139 -0
- data/lib/daru/io/importers/plaintext.rb +97 -0
- data/lib/daru/io/importers/r_data.rb +74 -0
- data/lib/daru/io/importers/rds.rb +67 -0
- data/lib/daru/io/importers/redis.rb +135 -0
- data/lib/daru/io/importers/sql.rb +127 -0
- data/lib/daru/io/link.rb +80 -0
- data/lib/daru/io/version.rb +5 -0
- metadata +269 -0
@@ -0,0 +1,66 @@
|
|
1
|
+
require 'daru/io/exporters/rds'
|
2
|
+
|
3
|
+
module Daru
|
4
|
+
module IO
|
5
|
+
module Exporters
|
6
|
+
# RData Exporter Class, that can be used to export multiple `Daru::DataFrame`s
|
7
|
+
# to a RData file
|
8
|
+
class RData < RDS
|
9
|
+
# Initializes a RData Exporter instance.
|
10
|
+
#
|
11
|
+
# @param options [Hash] A set of key-value pairs wherein the key depicts the name of
|
12
|
+
# the R `data.frame` variable name to be saved in the RData file, and the corresponding
|
13
|
+
# value depicts the `Daru::DataFrame` (or any Ruby variable in scope)
|
14
|
+
#
|
15
|
+
# @example Initializing RData Exporter instance
|
16
|
+
# df1 = Daru::DataFrame.new([[1,2],[3,4]], order: [:a, :b])
|
17
|
+
#
|
18
|
+
# #=> #<Daru::DataFrame(2x2)>
|
19
|
+
# # a b
|
20
|
+
# # 0 1 3
|
21
|
+
# # 1 2 4
|
22
|
+
#
|
23
|
+
# df2 = Daru::DataFrame.new([[5,6],[7,8]], order: [:x, :y])
|
24
|
+
#
|
25
|
+
# #=> #<Daru::DataFrame(2x2)>
|
26
|
+
# # x y
|
27
|
+
# # 0 5 7
|
28
|
+
# # 1 6 8
|
29
|
+
#
|
30
|
+
# instance = Daru::IO::Exporters::RData.new("first.df": df1, "second.df": df2)
|
31
|
+
def initialize(**options)
|
32
|
+
optional_gem 'rsruby'
|
33
|
+
|
34
|
+
@options = options
|
35
|
+
end
|
36
|
+
|
37
|
+
# Exports a RData Exporter instance to a file-writable String.
|
38
|
+
#
|
39
|
+
# @return [String] A file-writable string
|
40
|
+
#
|
41
|
+
# @example Writing to a RData file
|
42
|
+
# instance.to_s
|
43
|
+
#
|
44
|
+
# #=> "\u001F\x8B\b\u0000\u0000\u0000\u0000\u0000\u0000\u0003\vr\x890\xE2\x8A\xE0b```b..."
|
45
|
+
def to_s
|
46
|
+
super
|
47
|
+
end
|
48
|
+
|
49
|
+
# Exports an RData Exporter instance to a rdata file.
|
50
|
+
#
|
51
|
+
# @param path [String] Path of RData file where the dataframe(s) is/are to be saved
|
52
|
+
#
|
53
|
+
# @example Writing to a RData file
|
54
|
+
# instance.write("daru_dataframes.RData")
|
55
|
+
def write(path)
|
56
|
+
@instance = RSRuby.instance
|
57
|
+
@statements = @options.map do |r_variable, dataframe|
|
58
|
+
process_statements(r_variable, dataframe)
|
59
|
+
end.flatten
|
60
|
+
@statements << "save(#{@options.keys.map(&:to_s).join(', ')}, file='#{path}')"
|
61
|
+
@statements.each { |statement| @instance.eval_R(statement) }
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
require 'daru/io/exporters/base'
|
2
|
+
|
3
|
+
module Daru
|
4
|
+
module IO
|
5
|
+
module Exporters
|
6
|
+
# RDS Exporter Class, that extends `to_rds_string` and `write_rds` methods to
|
7
|
+
# `Daru::DataFrame` instance variables
|
8
|
+
class RDS < Base
|
9
|
+
Daru::DataFrame.register_io_module :to_rds_string, self
|
10
|
+
Daru::DataFrame.register_io_module :write_rds, self
|
11
|
+
|
12
|
+
# Initializes a RDS Exporter instance.
|
13
|
+
#
|
14
|
+
# @param dataframe [Daru::DataFrame] A dataframe to export
|
15
|
+
# @param r_variable [String] Name of the R `data.frame` variable name to be saved in the RDS file
|
16
|
+
#
|
17
|
+
# @example Initializing an RData Exporter
|
18
|
+
# df = Daru::DataFrame.new([[1,2],[3,4]], order: [:a, :b])
|
19
|
+
#
|
20
|
+
# #=> #<Daru::DataFrame(2x2)>
|
21
|
+
# # a b
|
22
|
+
# # 0 1 3
|
23
|
+
# # 1 2 4
|
24
|
+
#
|
25
|
+
# instance = Daru::IO::Exporters::RDS.new(df, "sample.dataframe")
|
26
|
+
def initialize(dataframe, r_variable)
|
27
|
+
optional_gem 'rsruby'
|
28
|
+
|
29
|
+
super(dataframe)
|
30
|
+
@r_variable = r_variable
|
31
|
+
end
|
32
|
+
|
33
|
+
# Exports a RDS Exporter instance to a file-writable String.
|
34
|
+
#
|
35
|
+
# @return [String] A file-writable string
|
36
|
+
#
|
37
|
+
# @example Getting a file-writable string from RDS Exporter instance
|
38
|
+
# instance.to_s #! same as df.to_rds_string("sample.dataframe")
|
39
|
+
#
|
40
|
+
# #=> "\u001F\x8B\b\u0000\u0000\u0000\u0000\u0000\u0000\u0003\x8B\xE0b```b..."
|
41
|
+
def to_s
|
42
|
+
super
|
43
|
+
end
|
44
|
+
|
45
|
+
# Exports a RDS Exporter instance to a rds file.
|
46
|
+
#
|
47
|
+
# @param path [String] Path of RDS file where the dataframe is to be saved
|
48
|
+
#
|
49
|
+
# @example Writing an RDS Exporter instance to a rds file
|
50
|
+
# instance.write("daru_dataframe.rds")
|
51
|
+
def write(path)
|
52
|
+
@instance = RSRuby.instance
|
53
|
+
@statements = process_statements(@r_variable, @dataframe)
|
54
|
+
@statements << "saveRDS(#{@r_variable}, file='#{path}')"
|
55
|
+
@statements.each { |statement| @instance.eval_R(statement) }
|
56
|
+
end
|
57
|
+
|
58
|
+
private
|
59
|
+
|
60
|
+
def process_statements(r_variable, dataframe)
|
61
|
+
[
|
62
|
+
*dataframe.map_vectors_with_index do |vector, i|
|
63
|
+
"#{i} = c(#{vector.to_a.map { |val| convert_datatype(val) }.join(', ')})"
|
64
|
+
end,
|
65
|
+
"#{r_variable} = data.frame(#{dataframe.vectors.to_a.map(&:to_s).join(', ')})"
|
66
|
+
]
|
67
|
+
end
|
68
|
+
|
69
|
+
def convert_datatype(value)
|
70
|
+
case value
|
71
|
+
when nil then 'NA'
|
72
|
+
when String then "'#{value}'"
|
73
|
+
else value
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'daru/io/exporters/base'
|
2
|
+
|
3
|
+
module Daru
|
4
|
+
module IO
|
5
|
+
module Exporters
|
6
|
+
# SQL Exporter Class, that extends `to_sql` method to `Daru::DataFrame`
|
7
|
+
# instance variables
|
8
|
+
class SQL < Base
|
9
|
+
Daru::DataFrame.register_io_module :to_sql, self
|
10
|
+
|
11
|
+
# Initializes a SQL Exporter instance.
|
12
|
+
#
|
13
|
+
# @param dataframe [Daru::DataFrame] A dataframe to export.
|
14
|
+
# @param dbh [DBI] A DBI database connection object.
|
15
|
+
# @param table [String] The SQL table to export to.
|
16
|
+
#
|
17
|
+
# @example Initializing with database credentials
|
18
|
+
# df = Daru::DataFrame.new([[1,2],[3,4]], order: [:a, :b])
|
19
|
+
#
|
20
|
+
# #=> #<Daru::DataFrame(2x2)>
|
21
|
+
# # a b
|
22
|
+
# # 0 1 3
|
23
|
+
# # 1 2 4
|
24
|
+
#
|
25
|
+
# table = 'test'
|
26
|
+
#
|
27
|
+
# dbh = DBI.connect("DBI:Mysql:database:localhost", "user", "password")
|
28
|
+
# # Enter the actual SQL database credentials in the above line
|
29
|
+
#
|
30
|
+
# instance = Daru::IO::Exporters::SQL.new(df, dbh, table)
|
31
|
+
def initialize(dataframe, dbh, table)
|
32
|
+
optional_gem 'dbd-sqlite3', requires: 'dbd/SQLite3'
|
33
|
+
optional_gem 'dbi'
|
34
|
+
optional_gem 'sqlite3'
|
35
|
+
|
36
|
+
super(dataframe)
|
37
|
+
@dbh = dbh
|
38
|
+
@table = table
|
39
|
+
end
|
40
|
+
|
41
|
+
# Exports a SQL Exporter instance to an SQL table.
|
42
|
+
#
|
43
|
+
# @example Exports SQL Exporter instance into given SQL table
|
44
|
+
# instance.to
|
45
|
+
def to
|
46
|
+
query = "INSERT INTO #{@table} (#{@dataframe.vectors.to_a.join(',')}"\
|
47
|
+
") VALUES (#{(['?']*@dataframe.vectors.size).join(',')})"
|
48
|
+
sth = @dbh.prepare(query)
|
49
|
+
@dataframe.each_row { |c| sth.execute(*c.to_a) }
|
50
|
+
true
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
Dir["#{__dir__}/importers/*.rb"].each { |file| require "daru/io#{file.gsub(__dir__, '')}" }
|
@@ -0,0 +1,75 @@
|
|
1
|
+
require 'daru/io/importers/base'
|
2
|
+
|
3
|
+
module Daru
|
4
|
+
module IO
|
5
|
+
module Importers
|
6
|
+
# ActiveRecord Importer Class, that extends `from_activerecord` method to
|
7
|
+
# `Daru::DataFrame`
|
8
|
+
class ActiveRecord < Base
|
9
|
+
Daru::DataFrame.register_io_module :from_activerecord, self
|
10
|
+
|
11
|
+
# Checks for required gem dependencies of ActiveRecord Importer
|
12
|
+
def initialize
|
13
|
+
optional_gem 'activerecord', '~> 4.0', requires: 'active_record'
|
14
|
+
end
|
15
|
+
|
16
|
+
# Loads data from a given relation
|
17
|
+
#
|
18
|
+
# @!method self.from(relation)
|
19
|
+
#
|
20
|
+
# @param relation [ActiveRecord::Relation] A relation to be used to load
|
21
|
+
# the contents of DataFrame
|
22
|
+
#
|
23
|
+
# @return [Daru::IO::Importers::ActiveRecord]
|
24
|
+
#
|
25
|
+
# @example Loading from a ActiveRecord instance
|
26
|
+
# instance = Daru::IO::Importers::ActiveRecord.from(Account.all)
|
27
|
+
def from(relation)
|
28
|
+
@relation = relation
|
29
|
+
self
|
30
|
+
end
|
31
|
+
|
32
|
+
# Imports a `Daru::DataFrame` from an ActiveRecord Importer instance
|
33
|
+
#
|
34
|
+
# @param fields [String or Array of Strings] A set of fields to load from.
|
35
|
+
#
|
36
|
+
# @return [Daru::DataFrame]
|
37
|
+
#
|
38
|
+
# @example Importing from an instance without specifying fields
|
39
|
+
# instance.call
|
40
|
+
#
|
41
|
+
# #=> #<Daru::DataFrame(2x3)>
|
42
|
+
# #=> id name age
|
43
|
+
# #=> 0 1 Homer 20
|
44
|
+
# #=> 1 2 Marge 30
|
45
|
+
#
|
46
|
+
# @example Importing from an instance with specific fields
|
47
|
+
# instance.call(:id, :name)
|
48
|
+
#
|
49
|
+
# #=> #<Daru::DataFrame(2x2)>
|
50
|
+
# #=> id name
|
51
|
+
# #=> 0 1 Homer
|
52
|
+
# #=> 1 2 Marge
|
53
|
+
def call(*fields)
|
54
|
+
@fields = fields
|
55
|
+
|
56
|
+
if @fields.empty?
|
57
|
+
records = @relation.map { |record| record.attributes.symbolize_keys }
|
58
|
+
return Daru::DataFrame.new(records)
|
59
|
+
else
|
60
|
+
@fields.map!(&:to_sym)
|
61
|
+
end
|
62
|
+
|
63
|
+
vectors = @fields.map { |name| [name, Daru::Vector.new([], name: name)] }.to_h
|
64
|
+
|
65
|
+
Daru::DataFrame.new(vectors, order: @fields).tap do |df|
|
66
|
+
@relation.pluck(*@fields).each do |record|
|
67
|
+
df.add_row(Array(record))
|
68
|
+
end
|
69
|
+
df.update
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
require 'daru/io/importers/base'
|
2
|
+
|
3
|
+
module Daru
|
4
|
+
module IO
|
5
|
+
module Importers
|
6
|
+
# Avro Importer Class, that extends `read_avro` method to `Daru::DataFrame`
|
7
|
+
class Avro < Base
|
8
|
+
Daru::DataFrame.register_io_module :read_avro, self
|
9
|
+
|
10
|
+
# Checks for required gem dependencies of Avro Importer
|
11
|
+
#
|
12
|
+
# @note The 'snappy' gem handles compressions and is used within Avro gem. Yet, it isn't
|
13
|
+
# specified as a dependency in Avro gem. Hence, it has been added separately.
|
14
|
+
def initialize
|
15
|
+
optional_gem 'avro'
|
16
|
+
optional_gem 'snappy'
|
17
|
+
end
|
18
|
+
|
19
|
+
# Reads data from an avro file
|
20
|
+
#
|
21
|
+
# @!method self.read(path)
|
22
|
+
#
|
23
|
+
# @param path [String] Path to Avro file, where the dataframe is to be imported from.
|
24
|
+
#
|
25
|
+
# @return [Daru::IO::Importers::Avro]
|
26
|
+
#
|
27
|
+
# @example Reading from avro file
|
28
|
+
# instance = Daru::IO::Importers::Avro.read("azorahai.avro")
|
29
|
+
def read(path)
|
30
|
+
@path = path
|
31
|
+
@buffer = StringIO.new(File.read(@path))
|
32
|
+
@data = ::Avro::DataFile::Reader.new(@buffer, ::Avro::IO::DatumReader.new).to_a
|
33
|
+
self
|
34
|
+
end
|
35
|
+
|
36
|
+
# Imports a `Daru::DataFrame` from an Avro Importer instance
|
37
|
+
#
|
38
|
+
# @return [Daru::DataFrame]
|
39
|
+
#
|
40
|
+
# @example Importing from an Avro file
|
41
|
+
# df = instance.call
|
42
|
+
#
|
43
|
+
# #=> #<Daru::DataFrame(3x3)>
|
44
|
+
# # name points winner
|
45
|
+
# # 0 Dany 100 true
|
46
|
+
# # 1 Jon 100 true
|
47
|
+
# # 2 Tyrion 100 true
|
48
|
+
def call
|
49
|
+
Daru::DataFrame.new(@data)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
require 'daru/io/base'
|
2
|
+
|
3
|
+
module Daru
|
4
|
+
module IO
|
5
|
+
module Importers
|
6
|
+
# Base Importer Class that contains generic helper methods, to be
|
7
|
+
# used by other Importers via inheritence
|
8
|
+
class Base < Daru::IO::Base
|
9
|
+
# Guesses the `Daru::DataFrame` from the parsed set of key-value pairs.
|
10
|
+
#
|
11
|
+
# @param keys [Array] A set of keys from given key-value pairs
|
12
|
+
# @param vals [Array] A set of values from given key-value pairs
|
13
|
+
#
|
14
|
+
# @example When key-value pairs contains values that is Array of Hashes
|
15
|
+
# Daru::IO::Importers::Base.guess_parse([:a], [[{ x: 1, y: 2 },{ x: 3, y: 4 }]])
|
16
|
+
#
|
17
|
+
# #=> #<Daru::DataFrame(2x2)>
|
18
|
+
# # x y
|
19
|
+
# # 0 1 2
|
20
|
+
# # 1 3 4
|
21
|
+
#
|
22
|
+
# @example When key-value pairs contains values that is Arrays
|
23
|
+
# Daru::IO::Importers::Base.guess_parse([:x, :y], [[1,3], [2,4]])
|
24
|
+
#
|
25
|
+
# #=> #<Daru::DataFrame(2x2)>
|
26
|
+
# # x y
|
27
|
+
# # 0 1 2
|
28
|
+
# # 1 3 4
|
29
|
+
#
|
30
|
+
# @example When key-value pairs contains Array of keys contain value Hashes
|
31
|
+
# Daru::IO::Importers::Base.guess_parse([:a, :b], [{ x: 1, y: 2 }, { x: 3, y: 4 }])
|
32
|
+
#
|
33
|
+
# #=> #<Daru::DataFrame(2x2)>
|
34
|
+
# # x y
|
35
|
+
# # a 1 2
|
36
|
+
# # b 3 4
|
37
|
+
def self.guess_parse(keys, vals)
|
38
|
+
case vals.first
|
39
|
+
when Array
|
40
|
+
case vals.first.first
|
41
|
+
when Hash then Daru::DataFrame.new(vals.flatten)
|
42
|
+
else Daru::DataFrame.rows(vals.transpose, order: keys)
|
43
|
+
end
|
44
|
+
when Hash then Daru::DataFrame.new(vals.flatten, index: keys)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
# Adds the `from` class method to all inheriting children Importer classes, which
|
49
|
+
# calls corresponding Importer's `initialize` and instance method `from`.
|
50
|
+
def self.from(relation)
|
51
|
+
new.from(relation)
|
52
|
+
end
|
53
|
+
|
54
|
+
# Adds the `read` class method to all inheriting children Importer classes, which
|
55
|
+
# calls corresponding Importer's `initialize` and instance method `read`.
|
56
|
+
def self.read(path)
|
57
|
+
new.read(path)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,190 @@
|
|
1
|
+
require 'daru/io/importers/base'
|
2
|
+
|
3
|
+
module Daru
|
4
|
+
module IO
|
5
|
+
module Importers
|
6
|
+
# CSV Importer Class, that extends `read_csv` method to `Daru::DataFrame`
|
7
|
+
class CSV < Base
|
8
|
+
Daru::DataFrame.register_io_module :read_csv, self
|
9
|
+
|
10
|
+
CONVERTERS = {
|
11
|
+
boolean: lambda { |f, _|
|
12
|
+
case f.downcase.strip
|
13
|
+
when 'true' then true
|
14
|
+
when 'false' then false
|
15
|
+
else f
|
16
|
+
end
|
17
|
+
}
|
18
|
+
}.freeze
|
19
|
+
|
20
|
+
# Checks for required gem dependencies of CSV Importer
|
21
|
+
def initialize
|
22
|
+
require 'csv'
|
23
|
+
require 'open-uri'
|
24
|
+
require 'zlib'
|
25
|
+
end
|
26
|
+
|
27
|
+
# Reads data from a csv / csv.gz file
|
28
|
+
#
|
29
|
+
# @!method self.read(path)
|
30
|
+
#
|
31
|
+
# @param path [String] Path to csv / csv.gz file, where the dataframe is to be imported
|
32
|
+
# from.
|
33
|
+
#
|
34
|
+
# @return [Daru::IO::Importers::CSV]
|
35
|
+
#
|
36
|
+
# @example Reading from csv file
|
37
|
+
# instance = Daru::IO::Importers::CSV.read("matrix_test.csv")
|
38
|
+
#
|
39
|
+
# @example Reading from csv.gz file
|
40
|
+
# instance = Daru::IO::Importers::CSV.read("matrix_test.csv.gz")
|
41
|
+
def read(path)
|
42
|
+
@path = path
|
43
|
+
@file_data = open(@path)
|
44
|
+
self
|
45
|
+
end
|
46
|
+
|
47
|
+
# Imports a `Daru::DataFrame` from a CSV Importer instance
|
48
|
+
#
|
49
|
+
# @param headers [Boolean] If this option is `true`, only those columns
|
50
|
+
# will be used to import the `Daru::DataFrame` whose header is given.
|
51
|
+
# @param skiprows [Integer] Skips the first `:skiprows` number of rows from
|
52
|
+
# the CSV file. Defaults to 0.
|
53
|
+
# @param compression [Symbol] Defaults to `:infer`, to parse depending on file format
|
54
|
+
# like `.csv.gz`. For explicitly parsing data from a `.csv.gz` file, set
|
55
|
+
# `:compression` as `:gzip`.
|
56
|
+
# @param clone [Boolean] Have a look at `:clone` option
|
57
|
+
# [here](http://www.rubydoc.info/gems/daru/0.1.5/Daru%2FDataFrame:initialize)
|
58
|
+
# @param index [Array or Daru::Index or Daru::MultiIndex] Have a look at
|
59
|
+
# `:index` option
|
60
|
+
# [here](http://www.rubydoc.info/gems/daru/0.1.5/Daru%2FDataFrame:initialize)
|
61
|
+
# @param order [Array or Daru::Index or Daru::MultiIndex] Have a look at
|
62
|
+
# `:order` option
|
63
|
+
# [here](http://www.rubydoc.info/gems/daru/0.1.5/Daru%2FDataFrame:initialize)
|
64
|
+
# @param name [String] Have a look at `:name` option
|
65
|
+
# [here](http://www.rubydoc.info/gems/daru/0.1.5/Daru%2FDataFrame:initialize)
|
66
|
+
# @param options [Hash] CSV standard library options such as `:col_sep`
|
67
|
+
# (defaults to `','`), `:converters` (defaults to `:numeric`),
|
68
|
+
# `:header_converters` (defaults to `:symbol`).
|
69
|
+
#
|
70
|
+
# @return [Daru::DataFrame]
|
71
|
+
#
|
72
|
+
# @example Calling with csv options
|
73
|
+
# df = instance.call(col_sep: ' ', headers: true)
|
74
|
+
#
|
75
|
+
# #=> #<Daru::DataFrame(99x3)>
|
76
|
+
# # image_reso mls true_trans
|
77
|
+
# # 0 6.55779 0 -0.2362347
|
78
|
+
# # 1 2.14746 0 -0.1539447
|
79
|
+
# # 2 8.31104 0 0.3832846,
|
80
|
+
# # 3 3.47872 0 0.3832846,
|
81
|
+
# # 4 4.16725 0 -0.2362347
|
82
|
+
# # 5 5.79983 0 -0.2362347
|
83
|
+
# # 6 1.9058 0 -0.895577,
|
84
|
+
# # 7 1.9058 0 -0.2362347
|
85
|
+
# # 8 4.11806 0 -0.895577,
|
86
|
+
# # 9 6.26622 0 -0.2362347
|
87
|
+
# # 10 2.57805 0 -0.1539447
|
88
|
+
# # 11 4.76151 0 -0.2362347
|
89
|
+
# # 12 7.11002 0 -0.895577,
|
90
|
+
# # 13 5.40811 0 -0.2362347
|
91
|
+
# # 14 8.19567 0 -0.1539447
|
92
|
+
# # ... ... ... ...
|
93
|
+
#
|
94
|
+
# @example Calling with csv.gz options
|
95
|
+
# df = instance.call(compression: :gzip, col_sep: ' ', headers: true)
|
96
|
+
#
|
97
|
+
# #=> #<Daru::DataFrame(99x3)>
|
98
|
+
# # image_reso mls true_trans
|
99
|
+
# # 0 6.55779 0 -0.2362347
|
100
|
+
# # 1 2.14746 0 -0.1539447
|
101
|
+
# # 2 8.31104 0 0.3832846,
|
102
|
+
# # 3 3.47872 0 0.3832846,
|
103
|
+
# # 4 4.16725 0 -0.2362347
|
104
|
+
# # 5 5.79983 0 -0.2362347
|
105
|
+
# # 6 1.9058 0 -0.895577,
|
106
|
+
# # 7 1.9058 0 -0.2362347
|
107
|
+
# # 8 4.11806 0 -0.895577,
|
108
|
+
# # 9 6.26622 0 -0.2362347
|
109
|
+
# # 10 2.57805 0 -0.1539447
|
110
|
+
# # 11 4.76151 0 -0.2362347
|
111
|
+
# # 12 7.11002 0 -0.895577,
|
112
|
+
# # 13 5.40811 0 -0.2362347
|
113
|
+
# # 14 8.19567 0 -0.1539447
|
114
|
+
# # ... ... ... ...
|
115
|
+
def call(headers: nil, skiprows: 0, compression: :infer,
|
116
|
+
clone: nil, index: nil, order: nil, name: nil, **options)
|
117
|
+
init_opts(headers: headers, skiprows: skiprows, compression: compression,
|
118
|
+
clone: clone, index: index, order: order, name: name, **options)
|
119
|
+
process_compression
|
120
|
+
|
121
|
+
# Preprocess headers for detecting and correcting repetition in
|
122
|
+
# case the :headers option is not specified.
|
123
|
+
hsh =
|
124
|
+
if @headers
|
125
|
+
hash_with_headers
|
126
|
+
else
|
127
|
+
hash_without_headers.tap { |hash| @daru_options[:order] = hash.keys }
|
128
|
+
end
|
129
|
+
|
130
|
+
Daru::DataFrame.new(hsh, @daru_options)
|
131
|
+
end
|
132
|
+
|
133
|
+
private
|
134
|
+
|
135
|
+
def compression?(algorithm, *formats)
|
136
|
+
@compression == algorithm || formats.any? { |f| @path.end_with?(f) }
|
137
|
+
end
|
138
|
+
|
139
|
+
def hash_with_headers
|
140
|
+
::CSV
|
141
|
+
.parse(@file_data, @options)
|
142
|
+
.tap { |c| yield c if block_given? }
|
143
|
+
.by_col
|
144
|
+
.map do |col_name, values|
|
145
|
+
[col_name, values.nil? ? [] : values[@skiprows..-1]]
|
146
|
+
end
|
147
|
+
.to_h
|
148
|
+
end
|
149
|
+
|
150
|
+
def hash_without_headers
|
151
|
+
csv_as_arrays =
|
152
|
+
::CSV
|
153
|
+
.parse(@file_data, @options)
|
154
|
+
.tap { |c| yield c if block_given? }
|
155
|
+
.to_a
|
156
|
+
headers = ArrayHelper.recode_repeated(csv_as_arrays.shift)
|
157
|
+
csv_as_arrays = csv_as_arrays[@skiprows..-1].transpose
|
158
|
+
headers
|
159
|
+
.each_with_index
|
160
|
+
.map do |h, i|
|
161
|
+
[h, csv_as_arrays[i] || []]
|
162
|
+
end
|
163
|
+
.to_h
|
164
|
+
end
|
165
|
+
|
166
|
+
def init_opts(headers: nil, skiprows: 0, compression: :infer,
|
167
|
+
clone: nil, index: nil, order: nil, name: nil, **options)
|
168
|
+
@headers = headers
|
169
|
+
@skiprows = skiprows
|
170
|
+
@compression = compression
|
171
|
+
@daru_options = {clone: clone, index: index, order: order, name: name}
|
172
|
+
@options = {
|
173
|
+
col_sep: ',', converters: [:numeric], header_converters: :symbol,
|
174
|
+
headers: @headers, skip_blanks: true
|
175
|
+
}.merge(options)
|
176
|
+
|
177
|
+
@options[:converters] = @options[:converters].flat_map do |c|
|
178
|
+
next ::CSV::Converters[c] if ::CSV::Converters[c]
|
179
|
+
next CONVERTERS[c] if CONVERTERS[c]
|
180
|
+
c
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
def process_compression
|
185
|
+
@file_data = ::Zlib::GzipReader.new(@file_data).read if compression?(:gzip, '.csv.gz')
|
186
|
+
end
|
187
|
+
end
|
188
|
+
end
|
189
|
+
end
|
190
|
+
end
|