daru-io 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +11 -0
- data/.rspec +2 -0
- data/.rspec_formatter.rb +24 -0
- data/.rubocop.yml +109 -0
- data/.travis.yml +30 -0
- data/.yardopts +2 -0
- data/CODE_OF_CONDUCT.md +46 -0
- data/CONTRIBUTING.md +65 -0
- data/Gemfile +20 -0
- data/Guardfile +7 -0
- data/LICENSE.md +21 -0
- data/README.md +654 -0
- data/Rakefile +12 -0
- data/daru-io.gemspec +39 -0
- data/lib/daru/io.rb +3 -0
- data/lib/daru/io/base.rb +45 -0
- data/lib/daru/io/exporters.rb +1 -0
- data/lib/daru/io/exporters/avro.rb +96 -0
- data/lib/daru/io/exporters/base.rb +54 -0
- data/lib/daru/io/exporters/csv.rb +103 -0
- data/lib/daru/io/exporters/excel.rb +148 -0
- data/lib/daru/io/exporters/json.rb +570 -0
- data/lib/daru/io/exporters/r_data.rb +66 -0
- data/lib/daru/io/exporters/rds.rb +79 -0
- data/lib/daru/io/exporters/sql.rb +55 -0
- data/lib/daru/io/importers.rb +1 -0
- data/lib/daru/io/importers/active_record.rb +75 -0
- data/lib/daru/io/importers/avro.rb +54 -0
- data/lib/daru/io/importers/base.rb +62 -0
- data/lib/daru/io/importers/csv.rb +190 -0
- data/lib/daru/io/importers/excel.rb +99 -0
- data/lib/daru/io/importers/excelx.rb +138 -0
- data/lib/daru/io/importers/html.rb +144 -0
- data/lib/daru/io/importers/json.rb +152 -0
- data/lib/daru/io/importers/mongo.rb +139 -0
- data/lib/daru/io/importers/plaintext.rb +97 -0
- data/lib/daru/io/importers/r_data.rb +74 -0
- data/lib/daru/io/importers/rds.rb +67 -0
- data/lib/daru/io/importers/redis.rb +135 -0
- data/lib/daru/io/importers/sql.rb +127 -0
- data/lib/daru/io/link.rb +80 -0
- data/lib/daru/io/version.rb +5 -0
- metadata +269 -0
@@ -0,0 +1,66 @@
|
|
1
|
+
require 'daru/io/exporters/rds'
|
2
|
+
|
3
|
+
module Daru
|
4
|
+
module IO
|
5
|
+
module Exporters
|
6
|
+
# RData Exporter Class, that can be used to export multiple `Daru::DataFrame`s
|
7
|
+
# to a RData file
|
8
|
+
class RData < RDS
|
9
|
+
# Initializes a RData Exporter instance.
|
10
|
+
#
|
11
|
+
# @param options [Hash] A set of key-value pairs wherein the key depicts the name of
|
12
|
+
# the R `data.frame` variable name to be saved in the RData file, and the corresponding
|
13
|
+
# value depicts the `Daru::DataFrame` (or any Ruby variable in scope)
|
14
|
+
#
|
15
|
+
# @example Initializing RData Exporter instance
|
16
|
+
# df1 = Daru::DataFrame.new([[1,2],[3,4]], order: [:a, :b])
|
17
|
+
#
|
18
|
+
# #=> #<Daru::DataFrame(2x2)>
|
19
|
+
# # a b
|
20
|
+
# # 0 1 3
|
21
|
+
# # 1 2 4
|
22
|
+
#
|
23
|
+
# df2 = Daru::DataFrame.new([[5,6],[7,8]], order: [:x, :y])
|
24
|
+
#
|
25
|
+
# #=> #<Daru::DataFrame(2x2)>
|
26
|
+
# # x y
|
27
|
+
# # 0 5 7
|
28
|
+
# # 1 6 8
|
29
|
+
#
|
30
|
+
# instance = Daru::IO::Exporters::RData.new("first.df": df1, "second.df": df2)
|
31
|
+
def initialize(**options)
|
32
|
+
optional_gem 'rsruby'
|
33
|
+
|
34
|
+
@options = options
|
35
|
+
end
|
36
|
+
|
37
|
+
# Exports a RData Exporter instance to a file-writable String.
|
38
|
+
#
|
39
|
+
# @return [String] A file-writable string
|
40
|
+
#
|
41
|
+
# @example Writing to a RData file
|
42
|
+
# instance.to_s
|
43
|
+
#
|
44
|
+
# #=> "\u001F\x8B\b\u0000\u0000\u0000\u0000\u0000\u0000\u0003\vr\x890\xE2\x8A\xE0b```b..."
|
45
|
+
def to_s
|
46
|
+
super
|
47
|
+
end
|
48
|
+
|
49
|
+
# Exports an RData Exporter instance to a rdata file.
|
50
|
+
#
|
51
|
+
# @param path [String] Path of RData file where the dataframe(s) is/are to be saved
|
52
|
+
#
|
53
|
+
# @example Writing to a RData file
|
54
|
+
# instance.write("daru_dataframes.RData")
|
55
|
+
def write(path)
|
56
|
+
@instance = RSRuby.instance
|
57
|
+
@statements = @options.map do |r_variable, dataframe|
|
58
|
+
process_statements(r_variable, dataframe)
|
59
|
+
end.flatten
|
60
|
+
@statements << "save(#{@options.keys.map(&:to_s).join(', ')}, file='#{path}')"
|
61
|
+
@statements.each { |statement| @instance.eval_R(statement) }
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
require 'daru/io/exporters/base'
|
2
|
+
|
3
|
+
module Daru
|
4
|
+
module IO
|
5
|
+
module Exporters
|
6
|
+
# RDS Exporter Class, that extends `to_rds_string` and `write_rds` methods to
|
7
|
+
# `Daru::DataFrame` instance variables
|
8
|
+
class RDS < Base
|
9
|
+
Daru::DataFrame.register_io_module :to_rds_string, self
|
10
|
+
Daru::DataFrame.register_io_module :write_rds, self
|
11
|
+
|
12
|
+
# Initializes a RDS Exporter instance.
|
13
|
+
#
|
14
|
+
# @param dataframe [Daru::DataFrame] A dataframe to export
|
15
|
+
# @param r_variable [String] Name of the R `data.frame` variable name to be saved in the RDS file
|
16
|
+
#
|
17
|
+
# @example Initializing an RData Exporter
|
18
|
+
# df = Daru::DataFrame.new([[1,2],[3,4]], order: [:a, :b])
|
19
|
+
#
|
20
|
+
# #=> #<Daru::DataFrame(2x2)>
|
21
|
+
# # a b
|
22
|
+
# # 0 1 3
|
23
|
+
# # 1 2 4
|
24
|
+
#
|
25
|
+
# instance = Daru::IO::Exporters::RDS.new(df, "sample.dataframe")
|
26
|
+
def initialize(dataframe, r_variable)
|
27
|
+
optional_gem 'rsruby'
|
28
|
+
|
29
|
+
super(dataframe)
|
30
|
+
@r_variable = r_variable
|
31
|
+
end
|
32
|
+
|
33
|
+
# Exports a RDS Exporter instance to a file-writable String.
|
34
|
+
#
|
35
|
+
# @return [String] A file-writable string
|
36
|
+
#
|
37
|
+
# @example Getting a file-writable string from RDS Exporter instance
|
38
|
+
# instance.to_s #! same as df.to_rds_string("sample.dataframe")
|
39
|
+
#
|
40
|
+
# #=> "\u001F\x8B\b\u0000\u0000\u0000\u0000\u0000\u0000\u0003\x8B\xE0b```b..."
|
41
|
+
def to_s
|
42
|
+
super
|
43
|
+
end
|
44
|
+
|
45
|
+
# Exports a RDS Exporter instance to a rds file.
|
46
|
+
#
|
47
|
+
# @param path [String] Path of RDS file where the dataframe is to be saved
|
48
|
+
#
|
49
|
+
# @example Writing an RDS Exporter instance to a rds file
|
50
|
+
# instance.write("daru_dataframe.rds")
|
51
|
+
def write(path)
|
52
|
+
@instance = RSRuby.instance
|
53
|
+
@statements = process_statements(@r_variable, @dataframe)
|
54
|
+
@statements << "saveRDS(#{@r_variable}, file='#{path}')"
|
55
|
+
@statements.each { |statement| @instance.eval_R(statement) }
|
56
|
+
end
|
57
|
+
|
58
|
+
private
|
59
|
+
|
60
|
+
def process_statements(r_variable, dataframe)
|
61
|
+
[
|
62
|
+
*dataframe.map_vectors_with_index do |vector, i|
|
63
|
+
"#{i} = c(#{vector.to_a.map { |val| convert_datatype(val) }.join(', ')})"
|
64
|
+
end,
|
65
|
+
"#{r_variable} = data.frame(#{dataframe.vectors.to_a.map(&:to_s).join(', ')})"
|
66
|
+
]
|
67
|
+
end
|
68
|
+
|
69
|
+
def convert_datatype(value)
|
70
|
+
case value
|
71
|
+
when nil then 'NA'
|
72
|
+
when String then "'#{value}'"
|
73
|
+
else value
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'daru/io/exporters/base'
|
2
|
+
|
3
|
+
module Daru
|
4
|
+
module IO
|
5
|
+
module Exporters
|
6
|
+
# SQL Exporter Class, that extends `to_sql` method to `Daru::DataFrame`
|
7
|
+
# instance variables
|
8
|
+
class SQL < Base
|
9
|
+
Daru::DataFrame.register_io_module :to_sql, self
|
10
|
+
|
11
|
+
# Initializes a SQL Exporter instance.
|
12
|
+
#
|
13
|
+
# @param dataframe [Daru::DataFrame] A dataframe to export.
|
14
|
+
# @param dbh [DBI] A DBI database connection object.
|
15
|
+
# @param table [String] The SQL table to export to.
|
16
|
+
#
|
17
|
+
# @example Initializing with database credentials
|
18
|
+
# df = Daru::DataFrame.new([[1,2],[3,4]], order: [:a, :b])
|
19
|
+
#
|
20
|
+
# #=> #<Daru::DataFrame(2x2)>
|
21
|
+
# # a b
|
22
|
+
# # 0 1 3
|
23
|
+
# # 1 2 4
|
24
|
+
#
|
25
|
+
# table = 'test'
|
26
|
+
#
|
27
|
+
# dbh = DBI.connect("DBI:Mysql:database:localhost", "user", "password")
|
28
|
+
# # Enter the actual SQL database credentials in the above line
|
29
|
+
#
|
30
|
+
# instance = Daru::IO::Exporters::SQL.new(df, dbh, table)
|
31
|
+
def initialize(dataframe, dbh, table)
|
32
|
+
optional_gem 'dbd-sqlite3', requires: 'dbd/SQLite3'
|
33
|
+
optional_gem 'dbi'
|
34
|
+
optional_gem 'sqlite3'
|
35
|
+
|
36
|
+
super(dataframe)
|
37
|
+
@dbh = dbh
|
38
|
+
@table = table
|
39
|
+
end
|
40
|
+
|
41
|
+
# Exports a SQL Exporter instance to an SQL table.
|
42
|
+
#
|
43
|
+
# @example Exports SQL Exporter instance into given SQL table
|
44
|
+
# instance.to
|
45
|
+
def to
|
46
|
+
query = "INSERT INTO #{@table} (#{@dataframe.vectors.to_a.join(',')}"\
|
47
|
+
") VALUES (#{(['?']*@dataframe.vectors.size).join(',')})"
|
48
|
+
sth = @dbh.prepare(query)
|
49
|
+
@dataframe.each_row { |c| sth.execute(*c.to_a) }
|
50
|
+
true
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
Dir["#{__dir__}/importers/*.rb"].each { |file| require "daru/io#{file.gsub(__dir__, '')}" }
|
@@ -0,0 +1,75 @@
|
|
1
|
+
require 'daru/io/importers/base'
|
2
|
+
|
3
|
+
module Daru
|
4
|
+
module IO
|
5
|
+
module Importers
|
6
|
+
# ActiveRecord Importer Class, that extends `from_activerecord` method to
|
7
|
+
# `Daru::DataFrame`
|
8
|
+
class ActiveRecord < Base
|
9
|
+
Daru::DataFrame.register_io_module :from_activerecord, self
|
10
|
+
|
11
|
+
# Checks for required gem dependencies of ActiveRecord Importer
|
12
|
+
def initialize
|
13
|
+
optional_gem 'activerecord', '~> 4.0', requires: 'active_record'
|
14
|
+
end
|
15
|
+
|
16
|
+
# Loads data from a given relation
|
17
|
+
#
|
18
|
+
# @!method self.from(relation)
|
19
|
+
#
|
20
|
+
# @param relation [ActiveRecord::Relation] A relation to be used to load
|
21
|
+
# the contents of DataFrame
|
22
|
+
#
|
23
|
+
# @return [Daru::IO::Importers::ActiveRecord]
|
24
|
+
#
|
25
|
+
# @example Loading from a ActiveRecord instance
|
26
|
+
# instance = Daru::IO::Importers::ActiveRecord.from(Account.all)
|
27
|
+
def from(relation)
|
28
|
+
@relation = relation
|
29
|
+
self
|
30
|
+
end
|
31
|
+
|
32
|
+
# Imports a `Daru::DataFrame` from an ActiveRecord Importer instance
|
33
|
+
#
|
34
|
+
# @param fields [String or Array of Strings] A set of fields to load from.
|
35
|
+
#
|
36
|
+
# @return [Daru::DataFrame]
|
37
|
+
#
|
38
|
+
# @example Importing from an instance without specifying fields
|
39
|
+
# instance.call
|
40
|
+
#
|
41
|
+
# #=> #<Daru::DataFrame(2x3)>
|
42
|
+
# #=> id name age
|
43
|
+
# #=> 0 1 Homer 20
|
44
|
+
# #=> 1 2 Marge 30
|
45
|
+
#
|
46
|
+
# @example Importing from an instance with specific fields
|
47
|
+
# instance.call(:id, :name)
|
48
|
+
#
|
49
|
+
# #=> #<Daru::DataFrame(2x2)>
|
50
|
+
# #=> id name
|
51
|
+
# #=> 0 1 Homer
|
52
|
+
# #=> 1 2 Marge
|
53
|
+
def call(*fields)
|
54
|
+
@fields = fields
|
55
|
+
|
56
|
+
if @fields.empty?
|
57
|
+
records = @relation.map { |record| record.attributes.symbolize_keys }
|
58
|
+
return Daru::DataFrame.new(records)
|
59
|
+
else
|
60
|
+
@fields.map!(&:to_sym)
|
61
|
+
end
|
62
|
+
|
63
|
+
vectors = @fields.map { |name| [name, Daru::Vector.new([], name: name)] }.to_h
|
64
|
+
|
65
|
+
Daru::DataFrame.new(vectors, order: @fields).tap do |df|
|
66
|
+
@relation.pluck(*@fields).each do |record|
|
67
|
+
df.add_row(Array(record))
|
68
|
+
end
|
69
|
+
df.update
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
require 'daru/io/importers/base'
|
2
|
+
|
3
|
+
module Daru
|
4
|
+
module IO
|
5
|
+
module Importers
|
6
|
+
# Avro Importer Class, that extends `read_avro` method to `Daru::DataFrame`
|
7
|
+
class Avro < Base
|
8
|
+
Daru::DataFrame.register_io_module :read_avro, self
|
9
|
+
|
10
|
+
# Checks for required gem dependencies of Avro Importer
|
11
|
+
#
|
12
|
+
# @note The 'snappy' gem handles compressions and is used within Avro gem. Yet, it isn't
|
13
|
+
# specified as a dependency in Avro gem. Hence, it has been added separately.
|
14
|
+
def initialize
|
15
|
+
optional_gem 'avro'
|
16
|
+
optional_gem 'snappy'
|
17
|
+
end
|
18
|
+
|
19
|
+
# Reads data from an avro file
|
20
|
+
#
|
21
|
+
# @!method self.read(path)
|
22
|
+
#
|
23
|
+
# @param path [String] Path to Avro file, where the dataframe is to be imported from.
|
24
|
+
#
|
25
|
+
# @return [Daru::IO::Importers::Avro]
|
26
|
+
#
|
27
|
+
# @example Reading from avro file
|
28
|
+
# instance = Daru::IO::Importers::Avro.read("azorahai.avro")
|
29
|
+
def read(path)
|
30
|
+
@path = path
|
31
|
+
@buffer = StringIO.new(File.read(@path))
|
32
|
+
@data = ::Avro::DataFile::Reader.new(@buffer, ::Avro::IO::DatumReader.new).to_a
|
33
|
+
self
|
34
|
+
end
|
35
|
+
|
36
|
+
# Imports a `Daru::DataFrame` from an Avro Importer instance
|
37
|
+
#
|
38
|
+
# @return [Daru::DataFrame]
|
39
|
+
#
|
40
|
+
# @example Importing from an Avro file
|
41
|
+
# df = instance.call
|
42
|
+
#
|
43
|
+
# #=> #<Daru::DataFrame(3x3)>
|
44
|
+
# # name points winner
|
45
|
+
# # 0 Dany 100 true
|
46
|
+
# # 1 Jon 100 true
|
47
|
+
# # 2 Tyrion 100 true
|
48
|
+
def call
|
49
|
+
Daru::DataFrame.new(@data)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
require 'daru/io/base'
|
2
|
+
|
3
|
+
module Daru
|
4
|
+
module IO
|
5
|
+
module Importers
|
6
|
+
# Base Importer Class that contains generic helper methods, to be
|
7
|
+
# used by other Importers via inheritence
|
8
|
+
class Base < Daru::IO::Base
|
9
|
+
# Guesses the `Daru::DataFrame` from the parsed set of key-value pairs.
|
10
|
+
#
|
11
|
+
# @param keys [Array] A set of keys from given key-value pairs
|
12
|
+
# @param vals [Array] A set of values from given key-value pairs
|
13
|
+
#
|
14
|
+
# @example When key-value pairs contains values that is Array of Hashes
|
15
|
+
# Daru::IO::Importers::Base.guess_parse([:a], [[{ x: 1, y: 2 },{ x: 3, y: 4 }]])
|
16
|
+
#
|
17
|
+
# #=> #<Daru::DataFrame(2x2)>
|
18
|
+
# # x y
|
19
|
+
# # 0 1 2
|
20
|
+
# # 1 3 4
|
21
|
+
#
|
22
|
+
# @example When key-value pairs contains values that is Arrays
|
23
|
+
# Daru::IO::Importers::Base.guess_parse([:x, :y], [[1,3], [2,4]])
|
24
|
+
#
|
25
|
+
# #=> #<Daru::DataFrame(2x2)>
|
26
|
+
# # x y
|
27
|
+
# # 0 1 2
|
28
|
+
# # 1 3 4
|
29
|
+
#
|
30
|
+
# @example When key-value pairs contains Array of keys contain value Hashes
|
31
|
+
# Daru::IO::Importers::Base.guess_parse([:a, :b], [{ x: 1, y: 2 }, { x: 3, y: 4 }])
|
32
|
+
#
|
33
|
+
# #=> #<Daru::DataFrame(2x2)>
|
34
|
+
# # x y
|
35
|
+
# # a 1 2
|
36
|
+
# # b 3 4
|
37
|
+
def self.guess_parse(keys, vals)
|
38
|
+
case vals.first
|
39
|
+
when Array
|
40
|
+
case vals.first.first
|
41
|
+
when Hash then Daru::DataFrame.new(vals.flatten)
|
42
|
+
else Daru::DataFrame.rows(vals.transpose, order: keys)
|
43
|
+
end
|
44
|
+
when Hash then Daru::DataFrame.new(vals.flatten, index: keys)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
# Adds the `from` class method to all inheriting children Importer classes, which
|
49
|
+
# calls corresponding Importer's `initialize` and instance method `from`.
|
50
|
+
def self.from(relation)
|
51
|
+
new.from(relation)
|
52
|
+
end
|
53
|
+
|
54
|
+
# Adds the `read` class method to all inheriting children Importer classes, which
|
55
|
+
# calls corresponding Importer's `initialize` and instance method `read`.
|
56
|
+
def self.read(path)
|
57
|
+
new.read(path)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,190 @@
|
|
1
|
+
require 'daru/io/importers/base'
|
2
|
+
|
3
|
+
module Daru
|
4
|
+
module IO
|
5
|
+
module Importers
|
6
|
+
# CSV Importer Class, that extends `read_csv` method to `Daru::DataFrame`
|
7
|
+
class CSV < Base
|
8
|
+
Daru::DataFrame.register_io_module :read_csv, self
|
9
|
+
|
10
|
+
CONVERTERS = {
|
11
|
+
boolean: lambda { |f, _|
|
12
|
+
case f.downcase.strip
|
13
|
+
when 'true' then true
|
14
|
+
when 'false' then false
|
15
|
+
else f
|
16
|
+
end
|
17
|
+
}
|
18
|
+
}.freeze
|
19
|
+
|
20
|
+
# Checks for required gem dependencies of CSV Importer
|
21
|
+
def initialize
|
22
|
+
require 'csv'
|
23
|
+
require 'open-uri'
|
24
|
+
require 'zlib'
|
25
|
+
end
|
26
|
+
|
27
|
+
# Reads data from a csv / csv.gz file
|
28
|
+
#
|
29
|
+
# @!method self.read(path)
|
30
|
+
#
|
31
|
+
# @param path [String] Path to csv / csv.gz file, where the dataframe is to be imported
|
32
|
+
# from.
|
33
|
+
#
|
34
|
+
# @return [Daru::IO::Importers::CSV]
|
35
|
+
#
|
36
|
+
# @example Reading from csv file
|
37
|
+
# instance = Daru::IO::Importers::CSV.read("matrix_test.csv")
|
38
|
+
#
|
39
|
+
# @example Reading from csv.gz file
|
40
|
+
# instance = Daru::IO::Importers::CSV.read("matrix_test.csv.gz")
|
41
|
+
def read(path)
|
42
|
+
@path = path
|
43
|
+
@file_data = open(@path)
|
44
|
+
self
|
45
|
+
end
|
46
|
+
|
47
|
+
# Imports a `Daru::DataFrame` from a CSV Importer instance
|
48
|
+
#
|
49
|
+
# @param headers [Boolean] If this option is `true`, only those columns
|
50
|
+
# will be used to import the `Daru::DataFrame` whose header is given.
|
51
|
+
# @param skiprows [Integer] Skips the first `:skiprows` number of rows from
|
52
|
+
# the CSV file. Defaults to 0.
|
53
|
+
# @param compression [Symbol] Defaults to `:infer`, to parse depending on file format
|
54
|
+
# like `.csv.gz`. For explicitly parsing data from a `.csv.gz` file, set
|
55
|
+
# `:compression` as `:gzip`.
|
56
|
+
# @param clone [Boolean] Have a look at `:clone` option
|
57
|
+
# [here](http://www.rubydoc.info/gems/daru/0.1.5/Daru%2FDataFrame:initialize)
|
58
|
+
# @param index [Array or Daru::Index or Daru::MultiIndex] Have a look at
|
59
|
+
# `:index` option
|
60
|
+
# [here](http://www.rubydoc.info/gems/daru/0.1.5/Daru%2FDataFrame:initialize)
|
61
|
+
# @param order [Array or Daru::Index or Daru::MultiIndex] Have a look at
|
62
|
+
# `:order` option
|
63
|
+
# [here](http://www.rubydoc.info/gems/daru/0.1.5/Daru%2FDataFrame:initialize)
|
64
|
+
# @param name [String] Have a look at `:name` option
|
65
|
+
# [here](http://www.rubydoc.info/gems/daru/0.1.5/Daru%2FDataFrame:initialize)
|
66
|
+
# @param options [Hash] CSV standard library options such as `:col_sep`
|
67
|
+
# (defaults to `','`), `:converters` (defaults to `:numeric`),
|
68
|
+
# `:header_converters` (defaults to `:symbol`).
|
69
|
+
#
|
70
|
+
# @return [Daru::DataFrame]
|
71
|
+
#
|
72
|
+
# @example Calling with csv options
|
73
|
+
# df = instance.call(col_sep: ' ', headers: true)
|
74
|
+
#
|
75
|
+
# #=> #<Daru::DataFrame(99x3)>
|
76
|
+
# # image_reso mls true_trans
|
77
|
+
# # 0 6.55779 0 -0.2362347
|
78
|
+
# # 1 2.14746 0 -0.1539447
|
79
|
+
# # 2 8.31104 0 0.3832846,
|
80
|
+
# # 3 3.47872 0 0.3832846,
|
81
|
+
# # 4 4.16725 0 -0.2362347
|
82
|
+
# # 5 5.79983 0 -0.2362347
|
83
|
+
# # 6 1.9058 0 -0.895577,
|
84
|
+
# # 7 1.9058 0 -0.2362347
|
85
|
+
# # 8 4.11806 0 -0.895577,
|
86
|
+
# # 9 6.26622 0 -0.2362347
|
87
|
+
# # 10 2.57805 0 -0.1539447
|
88
|
+
# # 11 4.76151 0 -0.2362347
|
89
|
+
# # 12 7.11002 0 -0.895577,
|
90
|
+
# # 13 5.40811 0 -0.2362347
|
91
|
+
# # 14 8.19567 0 -0.1539447
|
92
|
+
# # ... ... ... ...
|
93
|
+
#
|
94
|
+
# @example Calling with csv.gz options
|
95
|
+
# df = instance.call(compression: :gzip, col_sep: ' ', headers: true)
|
96
|
+
#
|
97
|
+
# #=> #<Daru::DataFrame(99x3)>
|
98
|
+
# # image_reso mls true_trans
|
99
|
+
# # 0 6.55779 0 -0.2362347
|
100
|
+
# # 1 2.14746 0 -0.1539447
|
101
|
+
# # 2 8.31104 0 0.3832846,
|
102
|
+
# # 3 3.47872 0 0.3832846,
|
103
|
+
# # 4 4.16725 0 -0.2362347
|
104
|
+
# # 5 5.79983 0 -0.2362347
|
105
|
+
# # 6 1.9058 0 -0.895577,
|
106
|
+
# # 7 1.9058 0 -0.2362347
|
107
|
+
# # 8 4.11806 0 -0.895577,
|
108
|
+
# # 9 6.26622 0 -0.2362347
|
109
|
+
# # 10 2.57805 0 -0.1539447
|
110
|
+
# # 11 4.76151 0 -0.2362347
|
111
|
+
# # 12 7.11002 0 -0.895577,
|
112
|
+
# # 13 5.40811 0 -0.2362347
|
113
|
+
# # 14 8.19567 0 -0.1539447
|
114
|
+
# # ... ... ... ...
|
115
|
+
def call(headers: nil, skiprows: 0, compression: :infer,
|
116
|
+
clone: nil, index: nil, order: nil, name: nil, **options)
|
117
|
+
init_opts(headers: headers, skiprows: skiprows, compression: compression,
|
118
|
+
clone: clone, index: index, order: order, name: name, **options)
|
119
|
+
process_compression
|
120
|
+
|
121
|
+
# Preprocess headers for detecting and correcting repetition in
|
122
|
+
# case the :headers option is not specified.
|
123
|
+
hsh =
|
124
|
+
if @headers
|
125
|
+
hash_with_headers
|
126
|
+
else
|
127
|
+
hash_without_headers.tap { |hash| @daru_options[:order] = hash.keys }
|
128
|
+
end
|
129
|
+
|
130
|
+
Daru::DataFrame.new(hsh, @daru_options)
|
131
|
+
end
|
132
|
+
|
133
|
+
private
|
134
|
+
|
135
|
+
def compression?(algorithm, *formats)
|
136
|
+
@compression == algorithm || formats.any? { |f| @path.end_with?(f) }
|
137
|
+
end
|
138
|
+
|
139
|
+
def hash_with_headers
|
140
|
+
::CSV
|
141
|
+
.parse(@file_data, @options)
|
142
|
+
.tap { |c| yield c if block_given? }
|
143
|
+
.by_col
|
144
|
+
.map do |col_name, values|
|
145
|
+
[col_name, values.nil? ? [] : values[@skiprows..-1]]
|
146
|
+
end
|
147
|
+
.to_h
|
148
|
+
end
|
149
|
+
|
150
|
+
def hash_without_headers
|
151
|
+
csv_as_arrays =
|
152
|
+
::CSV
|
153
|
+
.parse(@file_data, @options)
|
154
|
+
.tap { |c| yield c if block_given? }
|
155
|
+
.to_a
|
156
|
+
headers = ArrayHelper.recode_repeated(csv_as_arrays.shift)
|
157
|
+
csv_as_arrays = csv_as_arrays[@skiprows..-1].transpose
|
158
|
+
headers
|
159
|
+
.each_with_index
|
160
|
+
.map do |h, i|
|
161
|
+
[h, csv_as_arrays[i] || []]
|
162
|
+
end
|
163
|
+
.to_h
|
164
|
+
end
|
165
|
+
|
166
|
+
def init_opts(headers: nil, skiprows: 0, compression: :infer,
|
167
|
+
clone: nil, index: nil, order: nil, name: nil, **options)
|
168
|
+
@headers = headers
|
169
|
+
@skiprows = skiprows
|
170
|
+
@compression = compression
|
171
|
+
@daru_options = {clone: clone, index: index, order: order, name: name}
|
172
|
+
@options = {
|
173
|
+
col_sep: ',', converters: [:numeric], header_converters: :symbol,
|
174
|
+
headers: @headers, skip_blanks: true
|
175
|
+
}.merge(options)
|
176
|
+
|
177
|
+
@options[:converters] = @options[:converters].flat_map do |c|
|
178
|
+
next ::CSV::Converters[c] if ::CSV::Converters[c]
|
179
|
+
next CONVERTERS[c] if CONVERTERS[c]
|
180
|
+
c
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
def process_compression
|
185
|
+
@file_data = ::Zlib::GzipReader.new(@file_data).read if compression?(:gzip, '.csv.gz')
|
186
|
+
end
|
187
|
+
end
|
188
|
+
end
|
189
|
+
end
|
190
|
+
end
|