daru-io 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +11 -0
  3. data/.rspec +2 -0
  4. data/.rspec_formatter.rb +24 -0
  5. data/.rubocop.yml +109 -0
  6. data/.travis.yml +30 -0
  7. data/.yardopts +2 -0
  8. data/CODE_OF_CONDUCT.md +46 -0
  9. data/CONTRIBUTING.md +65 -0
  10. data/Gemfile +20 -0
  11. data/Guardfile +7 -0
  12. data/LICENSE.md +21 -0
  13. data/README.md +654 -0
  14. data/Rakefile +12 -0
  15. data/daru-io.gemspec +39 -0
  16. data/lib/daru/io.rb +3 -0
  17. data/lib/daru/io/base.rb +45 -0
  18. data/lib/daru/io/exporters.rb +1 -0
  19. data/lib/daru/io/exporters/avro.rb +96 -0
  20. data/lib/daru/io/exporters/base.rb +54 -0
  21. data/lib/daru/io/exporters/csv.rb +103 -0
  22. data/lib/daru/io/exporters/excel.rb +148 -0
  23. data/lib/daru/io/exporters/json.rb +570 -0
  24. data/lib/daru/io/exporters/r_data.rb +66 -0
  25. data/lib/daru/io/exporters/rds.rb +79 -0
  26. data/lib/daru/io/exporters/sql.rb +55 -0
  27. data/lib/daru/io/importers.rb +1 -0
  28. data/lib/daru/io/importers/active_record.rb +75 -0
  29. data/lib/daru/io/importers/avro.rb +54 -0
  30. data/lib/daru/io/importers/base.rb +62 -0
  31. data/lib/daru/io/importers/csv.rb +190 -0
  32. data/lib/daru/io/importers/excel.rb +99 -0
  33. data/lib/daru/io/importers/excelx.rb +138 -0
  34. data/lib/daru/io/importers/html.rb +144 -0
  35. data/lib/daru/io/importers/json.rb +152 -0
  36. data/lib/daru/io/importers/mongo.rb +139 -0
  37. data/lib/daru/io/importers/plaintext.rb +97 -0
  38. data/lib/daru/io/importers/r_data.rb +74 -0
  39. data/lib/daru/io/importers/rds.rb +67 -0
  40. data/lib/daru/io/importers/redis.rb +135 -0
  41. data/lib/daru/io/importers/sql.rb +127 -0
  42. data/lib/daru/io/link.rb +80 -0
  43. data/lib/daru/io/version.rb +5 -0
  44. metadata +269 -0
@@ -0,0 +1,66 @@
1
+ require 'daru/io/exporters/rds'
2
+
3
+ module Daru
4
+ module IO
5
+ module Exporters
6
+ # RData Exporter Class, that can be used to export multiple `Daru::DataFrame`s
7
+ # to a RData file
8
+ class RData < RDS
9
+ # Initializes a RData Exporter instance.
10
+ #
11
+ # @param options [Hash] A set of key-value pairs wherein the key depicts the name of
12
+ # the R `data.frame` variable name to be saved in the RData file, and the corresponding
13
+ # value depicts the `Daru::DataFrame` (or any Ruby variable in scope)
14
+ #
15
+ # @example Initializing RData Exporter instance
16
+ # df1 = Daru::DataFrame.new([[1,2],[3,4]], order: [:a, :b])
17
+ #
18
+ # #=> #<Daru::DataFrame(2x2)>
19
+ # # a b
20
+ # # 0 1 3
21
+ # # 1 2 4
22
+ #
23
+ # df2 = Daru::DataFrame.new([[5,6],[7,8]], order: [:x, :y])
24
+ #
25
+ # #=> #<Daru::DataFrame(2x2)>
26
+ # # x y
27
+ # # 0 5 7
28
+ # # 1 6 8
29
+ #
30
+ # instance = Daru::IO::Exporters::RData.new("first.df": df1, "second.df": df2)
31
+ def initialize(**options)
32
+ optional_gem 'rsruby'
33
+
34
+ @options = options
35
+ end
36
+
37
+ # Exports a RData Exporter instance to a file-writable String.
38
+ #
39
+ # @return [String] A file-writable string
40
+ #
41
+ # @example Writing to a RData file
42
+ # instance.to_s
43
+ #
44
+ # #=> "\u001F\x8B\b\u0000\u0000\u0000\u0000\u0000\u0000\u0003\vr\x890\xE2\x8A\xE0b```b..."
45
+ def to_s
46
+ super
47
+ end
48
+
49
+ # Exports an RData Exporter instance to a rdata file.
50
+ #
51
+ # @param path [String] Path of RData file where the dataframe(s) is/are to be saved
52
+ #
53
+ # @example Writing to a RData file
54
+ # instance.write("daru_dataframes.RData")
55
+ def write(path)
56
+ @instance = RSRuby.instance
57
+ @statements = @options.map do |r_variable, dataframe|
58
+ process_statements(r_variable, dataframe)
59
+ end.flatten
60
+ @statements << "save(#{@options.keys.map(&:to_s).join(', ')}, file='#{path}')"
61
+ @statements.each { |statement| @instance.eval_R(statement) }
62
+ end
63
+ end
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,79 @@
1
+ require 'daru/io/exporters/base'
2
+
3
+ module Daru
4
+ module IO
5
+ module Exporters
6
+ # RDS Exporter Class, that extends `to_rds_string` and `write_rds` methods to
7
+ # `Daru::DataFrame` instance variables
8
+ class RDS < Base
9
+ Daru::DataFrame.register_io_module :to_rds_string, self
10
+ Daru::DataFrame.register_io_module :write_rds, self
11
+
12
+ # Initializes a RDS Exporter instance.
13
+ #
14
+ # @param dataframe [Daru::DataFrame] A dataframe to export
15
+ # @param r_variable [String] Name of the R `data.frame` variable name to be saved in the RDS file
16
+ #
17
+ # @example Initializing an RData Exporter
18
+ # df = Daru::DataFrame.new([[1,2],[3,4]], order: [:a, :b])
19
+ #
20
+ # #=> #<Daru::DataFrame(2x2)>
21
+ # # a b
22
+ # # 0 1 3
23
+ # # 1 2 4
24
+ #
25
+ # instance = Daru::IO::Exporters::RDS.new(df, "sample.dataframe")
26
+ def initialize(dataframe, r_variable)
27
+ optional_gem 'rsruby'
28
+
29
+ super(dataframe)
30
+ @r_variable = r_variable
31
+ end
32
+
33
+ # Exports a RDS Exporter instance to a file-writable String.
34
+ #
35
+ # @return [String] A file-writable string
36
+ #
37
+ # @example Getting a file-writable string from RDS Exporter instance
38
+ # instance.to_s #! same as df.to_rds_string("sample.dataframe")
39
+ #
40
+ # #=> "\u001F\x8B\b\u0000\u0000\u0000\u0000\u0000\u0000\u0003\x8B\xE0b```b..."
41
+ def to_s
42
+ super
43
+ end
44
+
45
+ # Exports a RDS Exporter instance to a rds file.
46
+ #
47
+ # @param path [String] Path of RDS file where the dataframe is to be saved
48
+ #
49
+ # @example Writing an RDS Exporter instance to a rds file
50
+ # instance.write("daru_dataframe.rds")
51
+ def write(path)
52
+ @instance = RSRuby.instance
53
+ @statements = process_statements(@r_variable, @dataframe)
54
+ @statements << "saveRDS(#{@r_variable}, file='#{path}')"
55
+ @statements.each { |statement| @instance.eval_R(statement) }
56
+ end
57
+
58
+ private
59
+
60
+ def process_statements(r_variable, dataframe)
61
+ [
62
+ *dataframe.map_vectors_with_index do |vector, i|
63
+ "#{i} = c(#{vector.to_a.map { |val| convert_datatype(val) }.join(', ')})"
64
+ end,
65
+ "#{r_variable} = data.frame(#{dataframe.vectors.to_a.map(&:to_s).join(', ')})"
66
+ ]
67
+ end
68
+
69
+ def convert_datatype(value)
70
+ case value
71
+ when nil then 'NA'
72
+ when String then "'#{value}'"
73
+ else value
74
+ end
75
+ end
76
+ end
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,55 @@
1
+ require 'daru/io/exporters/base'
2
+
3
+ module Daru
4
+ module IO
5
+ module Exporters
6
+ # SQL Exporter Class, that extends `to_sql` method to `Daru::DataFrame`
7
+ # instance variables
8
+ class SQL < Base
9
+ Daru::DataFrame.register_io_module :to_sql, self
10
+
11
+ # Initializes a SQL Exporter instance.
12
+ #
13
+ # @param dataframe [Daru::DataFrame] A dataframe to export.
14
+ # @param dbh [DBI] A DBI database connection object.
15
+ # @param table [String] The SQL table to export to.
16
+ #
17
+ # @example Initializing with database credentials
18
+ # df = Daru::DataFrame.new([[1,2],[3,4]], order: [:a, :b])
19
+ #
20
+ # #=> #<Daru::DataFrame(2x2)>
21
+ # # a b
22
+ # # 0 1 3
23
+ # # 1 2 4
24
+ #
25
+ # table = 'test'
26
+ #
27
+ # dbh = DBI.connect("DBI:Mysql:database:localhost", "user", "password")
28
+ # # Enter the actual SQL database credentials in the above line
29
+ #
30
+ # instance = Daru::IO::Exporters::SQL.new(df, dbh, table)
31
+ def initialize(dataframe, dbh, table)
32
+ optional_gem 'dbd-sqlite3', requires: 'dbd/SQLite3'
33
+ optional_gem 'dbi'
34
+ optional_gem 'sqlite3'
35
+
36
+ super(dataframe)
37
+ @dbh = dbh
38
+ @table = table
39
+ end
40
+
41
+ # Exports a SQL Exporter instance to an SQL table.
42
+ #
43
+ # @example Exports SQL Exporter instance into given SQL table
44
+ # instance.to
45
+ def to
46
+ query = "INSERT INTO #{@table} (#{@dataframe.vectors.to_a.join(',')}"\
47
+ ") VALUES (#{(['?']*@dataframe.vectors.size).join(',')})"
48
+ sth = @dbh.prepare(query)
49
+ @dataframe.each_row { |c| sth.execute(*c.to_a) }
50
+ true
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end
@@ -0,0 +1 @@
1
+ Dir["#{__dir__}/importers/*.rb"].each { |file| require "daru/io#{file.gsub(__dir__, '')}" }
@@ -0,0 +1,75 @@
1
+ require 'daru/io/importers/base'
2
+
3
+ module Daru
4
+ module IO
5
+ module Importers
6
+ # ActiveRecord Importer Class, that extends `from_activerecord` method to
7
+ # `Daru::DataFrame`
8
+ class ActiveRecord < Base
9
+ Daru::DataFrame.register_io_module :from_activerecord, self
10
+
11
+ # Checks for required gem dependencies of ActiveRecord Importer
12
+ def initialize
13
+ optional_gem 'activerecord', '~> 4.0', requires: 'active_record'
14
+ end
15
+
16
+ # Loads data from a given relation
17
+ #
18
+ # @!method self.from(relation)
19
+ #
20
+ # @param relation [ActiveRecord::Relation] A relation to be used to load
21
+ # the contents of DataFrame
22
+ #
23
+ # @return [Daru::IO::Importers::ActiveRecord]
24
+ #
25
+ # @example Loading from a ActiveRecord instance
26
+ # instance = Daru::IO::Importers::ActiveRecord.from(Account.all)
27
+ def from(relation)
28
+ @relation = relation
29
+ self
30
+ end
31
+
32
+ # Imports a `Daru::DataFrame` from an ActiveRecord Importer instance
33
+ #
34
+ # @param fields [String or Array of Strings] A set of fields to load from.
35
+ #
36
+ # @return [Daru::DataFrame]
37
+ #
38
+ # @example Importing from an instance without specifying fields
39
+ # instance.call
40
+ #
41
+ # #=> #<Daru::DataFrame(2x3)>
42
+ # #=> id name age
43
+ # #=> 0 1 Homer 20
44
+ # #=> 1 2 Marge 30
45
+ #
46
+ # @example Importing from an instance with specific fields
47
+ # instance.call(:id, :name)
48
+ #
49
+ # #=> #<Daru::DataFrame(2x2)>
50
+ # #=> id name
51
+ # #=> 0 1 Homer
52
+ # #=> 1 2 Marge
53
+ def call(*fields)
54
+ @fields = fields
55
+
56
+ if @fields.empty?
57
+ records = @relation.map { |record| record.attributes.symbolize_keys }
58
+ return Daru::DataFrame.new(records)
59
+ else
60
+ @fields.map!(&:to_sym)
61
+ end
62
+
63
+ vectors = @fields.map { |name| [name, Daru::Vector.new([], name: name)] }.to_h
64
+
65
+ Daru::DataFrame.new(vectors, order: @fields).tap do |df|
66
+ @relation.pluck(*@fields).each do |record|
67
+ df.add_row(Array(record))
68
+ end
69
+ df.update
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,54 @@
1
+ require 'daru/io/importers/base'
2
+
3
+ module Daru
4
+ module IO
5
+ module Importers
6
+ # Avro Importer Class, that extends `read_avro` method to `Daru::DataFrame`
7
+ class Avro < Base
8
+ Daru::DataFrame.register_io_module :read_avro, self
9
+
10
+ # Checks for required gem dependencies of Avro Importer
11
+ #
12
+ # @note The 'snappy' gem handles compressions and is used within Avro gem. Yet, it isn't
13
+ # specified as a dependency in Avro gem. Hence, it has been added separately.
14
+ def initialize
15
+ optional_gem 'avro'
16
+ optional_gem 'snappy'
17
+ end
18
+
19
+ # Reads data from an avro file
20
+ #
21
+ # @!method self.read(path)
22
+ #
23
+ # @param path [String] Path to Avro file, where the dataframe is to be imported from.
24
+ #
25
+ # @return [Daru::IO::Importers::Avro]
26
+ #
27
+ # @example Reading from avro file
28
+ # instance = Daru::IO::Importers::Avro.read("azorahai.avro")
29
+ def read(path)
30
+ @path = path
31
+ @buffer = StringIO.new(File.read(@path))
32
+ @data = ::Avro::DataFile::Reader.new(@buffer, ::Avro::IO::DatumReader.new).to_a
33
+ self
34
+ end
35
+
36
+ # Imports a `Daru::DataFrame` from an Avro Importer instance
37
+ #
38
+ # @return [Daru::DataFrame]
39
+ #
40
+ # @example Importing from an Avro file
41
+ # df = instance.call
42
+ #
43
+ # #=> #<Daru::DataFrame(3x3)>
44
+ # # name points winner
45
+ # # 0 Dany 100 true
46
+ # # 1 Jon 100 true
47
+ # # 2 Tyrion 100 true
48
+ def call
49
+ Daru::DataFrame.new(@data)
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,62 @@
1
+ require 'daru/io/base'
2
+
3
+ module Daru
4
+ module IO
5
+ module Importers
6
+ # Base Importer Class that contains generic helper methods, to be
7
+ # used by other Importers via inheritence
8
+ class Base < Daru::IO::Base
9
+ # Guesses the `Daru::DataFrame` from the parsed set of key-value pairs.
10
+ #
11
+ # @param keys [Array] A set of keys from given key-value pairs
12
+ # @param vals [Array] A set of values from given key-value pairs
13
+ #
14
+ # @example When key-value pairs contains values that is Array of Hashes
15
+ # Daru::IO::Importers::Base.guess_parse([:a], [[{ x: 1, y: 2 },{ x: 3, y: 4 }]])
16
+ #
17
+ # #=> #<Daru::DataFrame(2x2)>
18
+ # # x y
19
+ # # 0 1 2
20
+ # # 1 3 4
21
+ #
22
+ # @example When key-value pairs contains values that is Arrays
23
+ # Daru::IO::Importers::Base.guess_parse([:x, :y], [[1,3], [2,4]])
24
+ #
25
+ # #=> #<Daru::DataFrame(2x2)>
26
+ # # x y
27
+ # # 0 1 2
28
+ # # 1 3 4
29
+ #
30
+ # @example When key-value pairs contains Array of keys contain value Hashes
31
+ # Daru::IO::Importers::Base.guess_parse([:a, :b], [{ x: 1, y: 2 }, { x: 3, y: 4 }])
32
+ #
33
+ # #=> #<Daru::DataFrame(2x2)>
34
+ # # x y
35
+ # # a 1 2
36
+ # # b 3 4
37
+ def self.guess_parse(keys, vals)
38
+ case vals.first
39
+ when Array
40
+ case vals.first.first
41
+ when Hash then Daru::DataFrame.new(vals.flatten)
42
+ else Daru::DataFrame.rows(vals.transpose, order: keys)
43
+ end
44
+ when Hash then Daru::DataFrame.new(vals.flatten, index: keys)
45
+ end
46
+ end
47
+
48
+ # Adds the `from` class method to all inheriting children Importer classes, which
49
+ # calls corresponding Importer's `initialize` and instance method `from`.
50
+ def self.from(relation)
51
+ new.from(relation)
52
+ end
53
+
54
+ # Adds the `read` class method to all inheriting children Importer classes, which
55
+ # calls corresponding Importer's `initialize` and instance method `read`.
56
+ def self.read(path)
57
+ new.read(path)
58
+ end
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,190 @@
1
+ require 'daru/io/importers/base'
2
+
3
+ module Daru
4
+ module IO
5
+ module Importers
6
+ # CSV Importer Class, that extends `read_csv` method to `Daru::DataFrame`
7
+ class CSV < Base
8
+ Daru::DataFrame.register_io_module :read_csv, self
9
+
10
+ CONVERTERS = {
11
+ boolean: lambda { |f, _|
12
+ case f.downcase.strip
13
+ when 'true' then true
14
+ when 'false' then false
15
+ else f
16
+ end
17
+ }
18
+ }.freeze
19
+
20
+ # Checks for required gem dependencies of CSV Importer
21
+ def initialize
22
+ require 'csv'
23
+ require 'open-uri'
24
+ require 'zlib'
25
+ end
26
+
27
+ # Reads data from a csv / csv.gz file
28
+ #
29
+ # @!method self.read(path)
30
+ #
31
+ # @param path [String] Path to csv / csv.gz file, where the dataframe is to be imported
32
+ # from.
33
+ #
34
+ # @return [Daru::IO::Importers::CSV]
35
+ #
36
+ # @example Reading from csv file
37
+ # instance = Daru::IO::Importers::CSV.read("matrix_test.csv")
38
+ #
39
+ # @example Reading from csv.gz file
40
+ # instance = Daru::IO::Importers::CSV.read("matrix_test.csv.gz")
41
+ def read(path)
42
+ @path = path
43
+ @file_data = open(@path)
44
+ self
45
+ end
46
+
47
+ # Imports a `Daru::DataFrame` from a CSV Importer instance
48
+ #
49
+ # @param headers [Boolean] If this option is `true`, only those columns
50
+ # will be used to import the `Daru::DataFrame` whose header is given.
51
+ # @param skiprows [Integer] Skips the first `:skiprows` number of rows from
52
+ # the CSV file. Defaults to 0.
53
+ # @param compression [Symbol] Defaults to `:infer`, to parse depending on file format
54
+ # like `.csv.gz`. For explicitly parsing data from a `.csv.gz` file, set
55
+ # `:compression` as `:gzip`.
56
+ # @param clone [Boolean] Have a look at `:clone` option
57
+ # [here](http://www.rubydoc.info/gems/daru/0.1.5/Daru%2FDataFrame:initialize)
58
+ # @param index [Array or Daru::Index or Daru::MultiIndex] Have a look at
59
+ # `:index` option
60
+ # [here](http://www.rubydoc.info/gems/daru/0.1.5/Daru%2FDataFrame:initialize)
61
+ # @param order [Array or Daru::Index or Daru::MultiIndex] Have a look at
62
+ # `:order` option
63
+ # [here](http://www.rubydoc.info/gems/daru/0.1.5/Daru%2FDataFrame:initialize)
64
+ # @param name [String] Have a look at `:name` option
65
+ # [here](http://www.rubydoc.info/gems/daru/0.1.5/Daru%2FDataFrame:initialize)
66
+ # @param options [Hash] CSV standard library options such as `:col_sep`
67
+ # (defaults to `','`), `:converters` (defaults to `:numeric`),
68
+ # `:header_converters` (defaults to `:symbol`).
69
+ #
70
+ # @return [Daru::DataFrame]
71
+ #
72
+ # @example Calling with csv options
73
+ # df = instance.call(col_sep: ' ', headers: true)
74
+ #
75
+ # #=> #<Daru::DataFrame(99x3)>
76
+ # # image_reso mls true_trans
77
+ # # 0 6.55779 0 -0.2362347
78
+ # # 1 2.14746 0 -0.1539447
79
+ # # 2 8.31104 0 0.3832846,
80
+ # # 3 3.47872 0 0.3832846,
81
+ # # 4 4.16725 0 -0.2362347
82
+ # # 5 5.79983 0 -0.2362347
83
+ # # 6 1.9058 0 -0.895577,
84
+ # # 7 1.9058 0 -0.2362347
85
+ # # 8 4.11806 0 -0.895577,
86
+ # # 9 6.26622 0 -0.2362347
87
+ # # 10 2.57805 0 -0.1539447
88
+ # # 11 4.76151 0 -0.2362347
89
+ # # 12 7.11002 0 -0.895577,
90
+ # # 13 5.40811 0 -0.2362347
91
+ # # 14 8.19567 0 -0.1539447
92
+ # # ... ... ... ...
93
+ #
94
+ # @example Calling with csv.gz options
95
+ # df = instance.call(compression: :gzip, col_sep: ' ', headers: true)
96
+ #
97
+ # #=> #<Daru::DataFrame(99x3)>
98
+ # # image_reso mls true_trans
99
+ # # 0 6.55779 0 -0.2362347
100
+ # # 1 2.14746 0 -0.1539447
101
+ # # 2 8.31104 0 0.3832846,
102
+ # # 3 3.47872 0 0.3832846,
103
+ # # 4 4.16725 0 -0.2362347
104
+ # # 5 5.79983 0 -0.2362347
105
+ # # 6 1.9058 0 -0.895577,
106
+ # # 7 1.9058 0 -0.2362347
107
+ # # 8 4.11806 0 -0.895577,
108
+ # # 9 6.26622 0 -0.2362347
109
+ # # 10 2.57805 0 -0.1539447
110
+ # # 11 4.76151 0 -0.2362347
111
+ # # 12 7.11002 0 -0.895577,
112
+ # # 13 5.40811 0 -0.2362347
113
+ # # 14 8.19567 0 -0.1539447
114
+ # # ... ... ... ...
115
+ def call(headers: nil, skiprows: 0, compression: :infer,
116
+ clone: nil, index: nil, order: nil, name: nil, **options)
117
+ init_opts(headers: headers, skiprows: skiprows, compression: compression,
118
+ clone: clone, index: index, order: order, name: name, **options)
119
+ process_compression
120
+
121
+ # Preprocess headers for detecting and correcting repetition in
122
+ # case the :headers option is not specified.
123
+ hsh =
124
+ if @headers
125
+ hash_with_headers
126
+ else
127
+ hash_without_headers.tap { |hash| @daru_options[:order] = hash.keys }
128
+ end
129
+
130
+ Daru::DataFrame.new(hsh, @daru_options)
131
+ end
132
+
133
+ private
134
+
135
+ def compression?(algorithm, *formats)
136
+ @compression == algorithm || formats.any? { |f| @path.end_with?(f) }
137
+ end
138
+
139
+ def hash_with_headers
140
+ ::CSV
141
+ .parse(@file_data, @options)
142
+ .tap { |c| yield c if block_given? }
143
+ .by_col
144
+ .map do |col_name, values|
145
+ [col_name, values.nil? ? [] : values[@skiprows..-1]]
146
+ end
147
+ .to_h
148
+ end
149
+
150
+ def hash_without_headers
151
+ csv_as_arrays =
152
+ ::CSV
153
+ .parse(@file_data, @options)
154
+ .tap { |c| yield c if block_given? }
155
+ .to_a
156
+ headers = ArrayHelper.recode_repeated(csv_as_arrays.shift)
157
+ csv_as_arrays = csv_as_arrays[@skiprows..-1].transpose
158
+ headers
159
+ .each_with_index
160
+ .map do |h, i|
161
+ [h, csv_as_arrays[i] || []]
162
+ end
163
+ .to_h
164
+ end
165
+
166
+ def init_opts(headers: nil, skiprows: 0, compression: :infer,
167
+ clone: nil, index: nil, order: nil, name: nil, **options)
168
+ @headers = headers
169
+ @skiprows = skiprows
170
+ @compression = compression
171
+ @daru_options = {clone: clone, index: index, order: order, name: name}
172
+ @options = {
173
+ col_sep: ',', converters: [:numeric], header_converters: :symbol,
174
+ headers: @headers, skip_blanks: true
175
+ }.merge(options)
176
+
177
+ @options[:converters] = @options[:converters].flat_map do |c|
178
+ next ::CSV::Converters[c] if ::CSV::Converters[c]
179
+ next CONVERTERS[c] if CONVERTERS[c]
180
+ c
181
+ end
182
+ end
183
+
184
+ def process_compression
185
+ @file_data = ::Zlib::GzipReader.new(@file_data).read if compression?(:gzip, '.csv.gz')
186
+ end
187
+ end
188
+ end
189
+ end
190
+ end