daru-io 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +11 -0
- data/.rspec +2 -0
- data/.rspec_formatter.rb +24 -0
- data/.rubocop.yml +109 -0
- data/.travis.yml +30 -0
- data/.yardopts +2 -0
- data/CODE_OF_CONDUCT.md +46 -0
- data/CONTRIBUTING.md +65 -0
- data/Gemfile +20 -0
- data/Guardfile +7 -0
- data/LICENSE.md +21 -0
- data/README.md +654 -0
- data/Rakefile +12 -0
- data/daru-io.gemspec +39 -0
- data/lib/daru/io.rb +3 -0
- data/lib/daru/io/base.rb +45 -0
- data/lib/daru/io/exporters.rb +1 -0
- data/lib/daru/io/exporters/avro.rb +96 -0
- data/lib/daru/io/exporters/base.rb +54 -0
- data/lib/daru/io/exporters/csv.rb +103 -0
- data/lib/daru/io/exporters/excel.rb +148 -0
- data/lib/daru/io/exporters/json.rb +570 -0
- data/lib/daru/io/exporters/r_data.rb +66 -0
- data/lib/daru/io/exporters/rds.rb +79 -0
- data/lib/daru/io/exporters/sql.rb +55 -0
- data/lib/daru/io/importers.rb +1 -0
- data/lib/daru/io/importers/active_record.rb +75 -0
- data/lib/daru/io/importers/avro.rb +54 -0
- data/lib/daru/io/importers/base.rb +62 -0
- data/lib/daru/io/importers/csv.rb +190 -0
- data/lib/daru/io/importers/excel.rb +99 -0
- data/lib/daru/io/importers/excelx.rb +138 -0
- data/lib/daru/io/importers/html.rb +144 -0
- data/lib/daru/io/importers/json.rb +152 -0
- data/lib/daru/io/importers/mongo.rb +139 -0
- data/lib/daru/io/importers/plaintext.rb +97 -0
- data/lib/daru/io/importers/r_data.rb +74 -0
- data/lib/daru/io/importers/rds.rb +67 -0
- data/lib/daru/io/importers/redis.rb +135 -0
- data/lib/daru/io/importers/sql.rb +127 -0
- data/lib/daru/io/link.rb +80 -0
- data/lib/daru/io/version.rb +5 -0
- metadata +269 -0
@@ -0,0 +1,139 @@
|
|
1
|
+
require 'daru/io/importers/json'
|
2
|
+
|
3
|
+
module Daru
|
4
|
+
module IO
|
5
|
+
module Importers
|
6
|
+
# Mongo Importer Class, that extends `from_mongo` method to `Daru::DataFrame`
|
7
|
+
class Mongo < JSON
|
8
|
+
Daru::DataFrame.register_io_module :from_mongo, self
|
9
|
+
|
10
|
+
# Checks for required gem dependencies of Mongo Importer
|
11
|
+
def initialize
|
12
|
+
super
|
13
|
+
optional_gem 'mongo'
|
14
|
+
end
|
15
|
+
|
16
|
+
# Loads data from a given connection
|
17
|
+
#
|
18
|
+
# @!method self.from(connection)
|
19
|
+
#
|
20
|
+
# @param connection [String or Hash or Mongo::Client] Contains details
|
21
|
+
# about a Mongo database / hosts to connect.
|
22
|
+
#
|
23
|
+
# @return [Daru::IO::Importers::Mongo]
|
24
|
+
#
|
25
|
+
# @example Loading from a connection string
|
26
|
+
# instance_1 = Daru::IO::Importers::Mongo.from('mongodb://127.0.0.1:27017/test')
|
27
|
+
#
|
28
|
+
# @example Loading from a connection hash
|
29
|
+
# instance_2 = Daru::IO::Importers::Mongo.from({ hosts: ['127.0.0.1:27017'], database: 'test' })
|
30
|
+
#
|
31
|
+
# @example Loading from a Mongo::Client connection
|
32
|
+
# instance_3 = Daru::IO::Importers::Mongo.from(Mongo::Client.new ['127.0.0.1:27017'], database: 'test')
|
33
|
+
def from(connection)
|
34
|
+
@client = get_client(connection)
|
35
|
+
self
|
36
|
+
end
|
37
|
+
|
38
|
+
# Imports a `Daru::DataFrame` from a Mongo Importer instance.
|
39
|
+
#
|
40
|
+
# @param collection [String or Symbol] A specific collection in the
|
41
|
+
# Mongo database, to import as `Daru::DataFrame`.
|
42
|
+
# @param columns [Array] JSON-path slectors to select specific fields
|
43
|
+
# from the JSON input.
|
44
|
+
# @param order [String or Array] Either a JSON-path selector string, or
|
45
|
+
# an array containing the order of the `Daru::DataFrame`. DO NOT
|
46
|
+
# provide both `order` and `named_columns` at the same time.
|
47
|
+
# @param index [String or Array] Either a JSON-path selector string, or
|
48
|
+
# an array containing the order of the `Daru::DataFrame`.
|
49
|
+
# @param filter [Hash] Filters and chooses Mongo documents that match
|
50
|
+
# the given `filter` from the collection.
|
51
|
+
# @param limit [Interger] Limits the number of Mongo documents to be
|
52
|
+
# parsed from the collection.
|
53
|
+
# @param skip [Integer] Skips `skip` number of documents from the Mongo
|
54
|
+
# collection.
|
55
|
+
# @param named_columns [Hash] JSON-path selectors to select specific
|
56
|
+
# fields from the JSON input. DO NOT provide both `order` and
|
57
|
+
# `named_columns` at the same time.
|
58
|
+
#
|
59
|
+
# @note
|
60
|
+
# - For more information on using JSON-path selectors, have a look at
|
61
|
+
# the explanations {http://www.rubydoc.info/gems/jsonpath/0.5.8 here}
|
62
|
+
# and {http://goessner.net/articles/JsonPath/ here}.
|
63
|
+
# - The Mongo gem faces `Argument Error : expected Proc Argument`
|
64
|
+
# issue due to the bug in MRI Ruby 2.4.0 mentioned
|
65
|
+
# {https://bugs.ruby-lang.org/issues/13107 here}. This seems to have
|
66
|
+
# been fixed in Ruby 2.4.1 onwards. Hence, please avoid using this
|
67
|
+
# Mongo Importer in Ruby version 2.4.0.
|
68
|
+
#
|
69
|
+
# @return [Daru::DataFrame]
|
70
|
+
#
|
71
|
+
# @example Importing without jsonpath selectors
|
72
|
+
# # The below 'cars' collection can be recreated in a Mongo shell with -
|
73
|
+
# # db.cars.drop()
|
74
|
+
# # db.cars.insert({name: "Audi", price: 52642})
|
75
|
+
# # db.cars.insert({name: "Mercedes", price: 57127})
|
76
|
+
# # db.cars.insert({name: "Volvo", price: 29000})
|
77
|
+
#
|
78
|
+
# df = instance.call('cars')
|
79
|
+
#
|
80
|
+
# #=> #<Daru::DataFrame(3x3)>
|
81
|
+
# # _id name price
|
82
|
+
# # 0 5948d0bfcd Audi 52642.0
|
83
|
+
# # 1 5948d0c6cd Mercedes 57127.0
|
84
|
+
# # 2 5948d0cecd Volvo 29000.0
|
85
|
+
#
|
86
|
+
# @example Importing with jsonpath selectors
|
87
|
+
# # The below 'cars' collection can be recreated in a Mongo shell with -
|
88
|
+
# # db.cars.drop()
|
89
|
+
# # db.cars.insert({name: "Audi", price: 52642, star: { fuel: 9.8, cost: 8.6, seats: 9.9, sound: 9.3 }})
|
90
|
+
# # db.cars.insert({name: "Mercedes", price: 57127, star: { fuel: 9.3, cost: 8.9, seats: 8.4, sound: 9.1 }})
|
91
|
+
# # db.cars.insert({name: "Volvo", price: 29000, star: { fuel: 7.8, cost: 9.9, seats: 8.2, sound: 8.9 }})
|
92
|
+
#
|
93
|
+
# df = instance.call(
|
94
|
+
# 'cars',
|
95
|
+
# '$.._id',
|
96
|
+
# '$..name',
|
97
|
+
# '$..price',
|
98
|
+
# '$..star..fuel',
|
99
|
+
# '$..star..cost'
|
100
|
+
# )
|
101
|
+
#
|
102
|
+
# #=> #<Daru::DataFrame(3x5)>
|
103
|
+
# # _id name price fuel cost
|
104
|
+
# # 0 5948d40b50 Audi 52642.0 9.8 8.6
|
105
|
+
# # 1 5948d42850 Mercedes 57127.0 9.3 8.9
|
106
|
+
# # 2 5948d44350 Volvo 29000.0 7.8 9.9
|
107
|
+
def call(collection, *columns, order: nil, index: nil,
|
108
|
+
filter: nil, limit: nil, skip: nil, **named_columns)
|
109
|
+
@json = ::JSON.parse(
|
110
|
+
@client[collection.to_sym]
|
111
|
+
.find(filter, skip: skip, limit: limit)
|
112
|
+
.to_json
|
113
|
+
)
|
114
|
+
|
115
|
+
super(*columns, order: order, index: index, **named_columns)
|
116
|
+
end
|
117
|
+
|
118
|
+
private
|
119
|
+
|
120
|
+
def get_client(connection)
|
121
|
+
case connection
|
122
|
+
when ::Mongo::Client
|
123
|
+
connection
|
124
|
+
when Hash
|
125
|
+
hosts = connection.delete :hosts
|
126
|
+
::Mongo::Client.new(hosts, connection)
|
127
|
+
when String
|
128
|
+
::Mongo::Client.new(connection)
|
129
|
+
else
|
130
|
+
raise ArgumentError,
|
131
|
+
"Expected #{connection} to be either a Mongo instance, "\
|
132
|
+
'Mongo connection Hash, or Mongo connection URL String. '\
|
133
|
+
"Received #{connection.class} instead."
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
@@ -0,0 +1,97 @@
|
|
1
|
+
require 'daru/io/importers/base'
|
2
|
+
|
3
|
+
module Daru
|
4
|
+
module IO
|
5
|
+
module Importers
|
6
|
+
# Plaintext Importer Class, that extends `read_plaintext` method to
|
7
|
+
# `Daru::DataFrame`
|
8
|
+
class Plaintext < Base
|
9
|
+
Daru::DataFrame.register_io_module :read_plaintext, self
|
10
|
+
|
11
|
+
# Checks for required gem dependencies of Plaintext Importer
|
12
|
+
def initialize; end
|
13
|
+
|
14
|
+
# Reads data from a plaintext (.dat) file
|
15
|
+
#
|
16
|
+
# @!method self.read(path)
|
17
|
+
#
|
18
|
+
# @param path [String] Path to plaintext file, where the dataframe is to be
|
19
|
+
# imported from.
|
20
|
+
#
|
21
|
+
# @return [Daru::IO::Importers::Plaintext]
|
22
|
+
#
|
23
|
+
# @example Reading from plaintext file
|
24
|
+
# instance = Daru::IO::Importers::Plaintext.read("bank2.dat")
|
25
|
+
def read(path)
|
26
|
+
@file_data = File.read(path).split("\n").map do |line|
|
27
|
+
row = process_row(line.strip.split(/\s+/),[''])
|
28
|
+
next if row == ["\x1A"]
|
29
|
+
row
|
30
|
+
end
|
31
|
+
self
|
32
|
+
end
|
33
|
+
|
34
|
+
# Imports `Daru::DataFrame` from a Plaintext Importer instance
|
35
|
+
#
|
36
|
+
# @param fields [Array] An array of vectors.
|
37
|
+
#
|
38
|
+
# @return [Daru::DataFrame]
|
39
|
+
#
|
40
|
+
# @example Initializing with fields
|
41
|
+
# df = instance.call([:v1, :v2, :v3, :v4, :v5, :v6])
|
42
|
+
#
|
43
|
+
# #=> #<Daru::DataFrame(200x6)>
|
44
|
+
# # v1 v2 v3 v4 v5 v6
|
45
|
+
# # 0 214.8 131.0 131.1 9.0 9.7 141.0
|
46
|
+
# # 1 214.6 129.7 129.7 8.1 9.5 141.7
|
47
|
+
# # 2 214.8 129.7 129.7 8.7 9.6 142.2
|
48
|
+
# # 3 214.8 129.7 129.6 7.5 10.4 142.0
|
49
|
+
# # 4 215.0 129.6 129.7 10.4 7.7 141.8
|
50
|
+
# # 5 215.7 130.8 130.5 9.0 10.1 141.4
|
51
|
+
# # 6 215.5 129.5 129.7 7.9 9.6 141.6
|
52
|
+
# # 7 214.5 129.6 129.2 7.2 10.7 141.7
|
53
|
+
# # 8 214.9 129.4 129.7 8.2 11.0 141.9
|
54
|
+
# # 9 215.2 130.4 130.3 9.2 10.0 140.7
|
55
|
+
# # 10 215.3 130.4 130.3 7.9 11.7 141.8
|
56
|
+
# # 11 215.1 129.5 129.6 7.7 10.5 142.2
|
57
|
+
# # 12 215.2 130.8 129.6 7.9 10.8 141.4
|
58
|
+
# # 13 214.7 129.7 129.7 7.7 10.9 141.7
|
59
|
+
# # 14 215.1 129.9 129.7 7.7 10.8 141.8
|
60
|
+
# #... ... ... ... ... ... ...
|
61
|
+
def call(fields)
|
62
|
+
Daru::DataFrame.rows(@file_data, order: fields)
|
63
|
+
end
|
64
|
+
|
65
|
+
private
|
66
|
+
|
67
|
+
INT_PATTERN = /^[-+]?\d+$/
|
68
|
+
FLOAT_PATTERN = /^[-+]?\d+[,.]?\d*(e-?\d+)?$/
|
69
|
+
|
70
|
+
def process_row(row,empty)
|
71
|
+
row.to_a.map do |c|
|
72
|
+
if empty.include?(c)
|
73
|
+
# FIXME: As far as I can guess, it will never work.
|
74
|
+
# It is called only inside `from_plaintext`, and there
|
75
|
+
# data is splitted by `\s+` -- there is no chance that
|
76
|
+
# "empty" (currently just '') will be between data?..
|
77
|
+
nil
|
78
|
+
else
|
79
|
+
try_string_to_number(c)
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
def try_string_to_number(s)
|
85
|
+
case s
|
86
|
+
when INT_PATTERN
|
87
|
+
s.to_i
|
88
|
+
when FLOAT_PATTERN
|
89
|
+
s.tr(',', '.').to_f
|
90
|
+
else
|
91
|
+
s
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
require 'daru/io/importers/rds'
|
2
|
+
|
3
|
+
module Daru
|
4
|
+
module IO
|
5
|
+
module Importers
|
6
|
+
# RData Importer Class, that extends `read_rdata` method to `Daru::DataFrame`
|
7
|
+
#
|
8
|
+
# @see Daru::IO::Importers::RDS For .rds format
|
9
|
+
class RData < RDS
|
10
|
+
Daru::DataFrame.register_io_module :read_rdata, self
|
11
|
+
|
12
|
+
# Checks for required gem dependencies of RData Importer
|
13
|
+
def initialize
|
14
|
+
super
|
15
|
+
end
|
16
|
+
|
17
|
+
# Reads data from a Rdata file
|
18
|
+
#
|
19
|
+
# @!method self.read(path)
|
20
|
+
#
|
21
|
+
# @param path [String] Path to RData file, where the dataframe is to be imported from.
|
22
|
+
#
|
23
|
+
# @return [Daru::IO::Importers::RData]
|
24
|
+
#
|
25
|
+
# @example Reading from rdata file
|
26
|
+
# instance = Daru::IO::Importers::RData.read('ACScounty.RData')
|
27
|
+
def read(path)
|
28
|
+
@instance = RSRuby.instance
|
29
|
+
@instance.eval_R("load('#{path}')")
|
30
|
+
self
|
31
|
+
end
|
32
|
+
|
33
|
+
# Imports a `Daru::DataFrame` from a RData Importer instance and rdata file
|
34
|
+
#
|
35
|
+
# @param variable [String] The variable to be imported from the
|
36
|
+
# variables stored in the RData file. Please note that the R
|
37
|
+
# variable to be imported from the RData file should be a
|
38
|
+
# `data.frame`
|
39
|
+
#
|
40
|
+
# @return [Daru::DataFrame]
|
41
|
+
#
|
42
|
+
# @example Importing a particular variable
|
43
|
+
# df = instance.call("ACS3")
|
44
|
+
#
|
45
|
+
# #=> #<Daru::DataFrame(1629x30)>
|
46
|
+
# # Abbreviati FIPS Non.US State cnty females.di ...
|
47
|
+
# # 0 AL 1001 14.7 alabama autauga 13.8 ...
|
48
|
+
# # 1 AL 1003 13.5 alabama baldwin 14.1 ...
|
49
|
+
# # 2 AL 1005 20.1 alabama barbour 16.1 ...
|
50
|
+
# # 3 AL 1009 18.0 alabama blount 13.7 ...
|
51
|
+
# # 4 AL 1015 18.6 alabama calhoun 12.9 ...
|
52
|
+
# # ... ... ... ... ... ... ... ...
|
53
|
+
def call(variable)
|
54
|
+
@variable = variable.to_s
|
55
|
+
|
56
|
+
validate_params
|
57
|
+
|
58
|
+
process_dataframe(@instance.send(@variable.to_sym))
|
59
|
+
end
|
60
|
+
|
61
|
+
private
|
62
|
+
|
63
|
+
def validate_params
|
64
|
+
valid_r_dataframe_variables = @instance.eval_R('Filter(function(x) is.data.frame(get(x)) , ls())')
|
65
|
+
return if valid_r_dataframe_variables.include?(@variable)
|
66
|
+
|
67
|
+
variable_type = @instance.eval_R("typeof(#{@variable})")
|
68
|
+
raise ArgumentError, "Expected the given R variable (#{@variable}) to be a data.frame, got a "\
|
69
|
+
"#{variable_type} instead."
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
require 'daru/io/importers/base'
|
2
|
+
|
3
|
+
module Daru
|
4
|
+
module IO
|
5
|
+
module Importers
|
6
|
+
# RDS Importer Class, that extends `read_rds` method to `Daru::DataFrame`
|
7
|
+
#
|
8
|
+
# @see Daru::IO::Importers::RData For .Rdata format
|
9
|
+
class RDS < Base
|
10
|
+
Daru::DataFrame.register_io_module :read_rds, self
|
11
|
+
|
12
|
+
# Checks for required gem dependencies of RDS Importer
|
13
|
+
def initialize
|
14
|
+
optional_gem 'rsruby'
|
15
|
+
end
|
16
|
+
|
17
|
+
# Reads data from a rds file
|
18
|
+
#
|
19
|
+
# @!method self.read(path)
|
20
|
+
#
|
21
|
+
# @param path [String] Path to rds file, where the dataframe is to be
|
22
|
+
# imported from.
|
23
|
+
#
|
24
|
+
# @return [Daru::IO::Importers::RDS]
|
25
|
+
#
|
26
|
+
# @example Reading from rds file
|
27
|
+
# instance = Daru::IO::Importers::RDS.read('bc_sites.rds')
|
28
|
+
def read(path)
|
29
|
+
@instance = RSRuby.instance.eval_R("readRDS('#{path}')")
|
30
|
+
self
|
31
|
+
end
|
32
|
+
|
33
|
+
# Imports a `Daru::DataFrame` from a RDS Importer instance and rds file
|
34
|
+
#
|
35
|
+
# @return [Daru::DataFrame]
|
36
|
+
#
|
37
|
+
# @example Reading from a RDS file
|
38
|
+
# df = instance.call
|
39
|
+
#
|
40
|
+
# #=> #<Daru::DataFrame(1113x25)>
|
41
|
+
# # area descriptio epa_reach format_ver latitude location location_c ...
|
42
|
+
# # 0 016 GSPTN NaN 4.1 49.5 THOR IS 2MS22016 T ...
|
43
|
+
# # 1 012 CSPT NaN 4.1 50.6167 MITC BY 2MN26012 M ...
|
44
|
+
# # ... ... ... ... ... ... ... ... ...
|
45
|
+
def call
|
46
|
+
process_dataframe(@instance)
|
47
|
+
end
|
48
|
+
|
49
|
+
private
|
50
|
+
|
51
|
+
def process_dataframe(data)
|
52
|
+
data = data.map { |key, values| [key.to_sym, values.map { |val| convert_datatype(val) }] }.to_h
|
53
|
+
Daru::DataFrame.new(data)
|
54
|
+
end
|
55
|
+
|
56
|
+
def convert_datatype(value)
|
57
|
+
case value.to_s
|
58
|
+
when 'NaN' then nil
|
59
|
+
when value.to_f.to_s then value.to_f
|
60
|
+
when value.to_i.to_s then value.to_i
|
61
|
+
else value
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
@@ -0,0 +1,135 @@
|
|
1
|
+
require 'daru/io/importers/base'
|
2
|
+
|
3
|
+
module Daru
|
4
|
+
module IO
|
5
|
+
module Importers
|
6
|
+
# Redis Importer Class, that extends `from_redis` method to `Daru::DataFrame`
|
7
|
+
class Redis < Base
|
8
|
+
Daru::DataFrame.register_io_module :from_redis, self
|
9
|
+
|
10
|
+
# Checks for required gem dependencies of Redis Importer
|
11
|
+
def initialize
|
12
|
+
require 'json'
|
13
|
+
optional_gem 'redis'
|
14
|
+
end
|
15
|
+
|
16
|
+
# Loads data from a given connection
|
17
|
+
#
|
18
|
+
# @!method self.from(connection)
|
19
|
+
#
|
20
|
+
# @param connection [Hash or Redis Instance] Either a Hash of *Redis* configurations,
|
21
|
+
# or an existing *Redis* instance. For the hash configurations, have a
|
22
|
+
# look at
|
23
|
+
# [Redis#initialize](http://www.rubydoc.info/github/redis/redis-rb/Redis:initialize).
|
24
|
+
#
|
25
|
+
# @return [Daru::IO::Importers::Redis]
|
26
|
+
#
|
27
|
+
# @example Loading from a hash
|
28
|
+
# instance = Daru::IO::Importers::Redis.from({url: "redis://:[password]@[hostname]:[port]/[db]"})
|
29
|
+
#
|
30
|
+
# @example Loading from a Redis connection
|
31
|
+
# instance = Daru::IO::Importers::Redis.from(Redis.new({url: "redis://:[password]@[hostname]:[port]/[db]"}))
|
32
|
+
def from(connection={})
|
33
|
+
@client = get_client(connection)
|
34
|
+
self
|
35
|
+
end
|
36
|
+
|
37
|
+
# Imports a `Daru::DataFrame` from a Redis Importer instance
|
38
|
+
#
|
39
|
+
# @param keys [Array] Redis key(s) from whom, the `Daru::DataFrame`
|
40
|
+
# should be constructed. If no keys are given, all keys in the *Redis*
|
41
|
+
# connection will be used.
|
42
|
+
# @param match [String] A pattern to get matching keys.
|
43
|
+
# @param count [Integer] Number of matching keys to be obtained. Defaults to
|
44
|
+
# nil, to collect ALL matching keys.
|
45
|
+
#
|
46
|
+
# @return [Daru::DataFrame]
|
47
|
+
#
|
48
|
+
# @example Importing with no options
|
49
|
+
# # Say, the Redis connection has this setup
|
50
|
+
# # Key "10001" => { "name" => "Tyrion", "age" => 32 }.to_json
|
51
|
+
# # Key "10002" => { "name" => "Jamie", "age" => 37 }.to_json
|
52
|
+
# # Key "10003" => { "name" => "Cersei", "age" => 37 }.to_json
|
53
|
+
# # Key "10004" => { "name" => "Joffrey", "age" => 19 }.to_json
|
54
|
+
#
|
55
|
+
# df = instance.call
|
56
|
+
#
|
57
|
+
# #=> <Daru::DataFrame(4x2)>
|
58
|
+
# # name age
|
59
|
+
# # 10001 Tyrion 32
|
60
|
+
# # 10002 Jamie 37
|
61
|
+
# # 10003 Cersei 37
|
62
|
+
# # 10004 Joffrey 19
|
63
|
+
#
|
64
|
+
# @example Importing with keys
|
65
|
+
# # Say, the Redis connection has this setup
|
66
|
+
# # Key "10001" => { "name" => "Tyrion", "age" => 32 }.to_json
|
67
|
+
# # Key "10002" => { "name" => "Jamie", "age" => 37 }.to_json
|
68
|
+
# # Key "10003" => { "name" => "Cersei", "age" => 37 }.to_json
|
69
|
+
# # Key "10004" => { "name" => "Joffrey", "age" => 19 }.to_json
|
70
|
+
#
|
71
|
+
# df = instance.call("10001", "10002")
|
72
|
+
#
|
73
|
+
# #=> <Daru::DataFrame(2x2)>
|
74
|
+
# # name age
|
75
|
+
# # 10001 Tyrion 32
|
76
|
+
# # 10002 Jamie 37
|
77
|
+
#
|
78
|
+
# @example Importing with query for matching keys and count
|
79
|
+
# # Say, the Redis connection has this setup
|
80
|
+
# # Key "key:1" => { "name" => "name1", "age" => "age1" }.to_json
|
81
|
+
# # Key "key:2" => { "name" => "name2", "age" => "age2" }.to_json
|
82
|
+
# # Key "key:3" => { "name" => "name3", "age" => "age3" }.to_json
|
83
|
+
# # ...
|
84
|
+
# # Key "key:2000" => { "name" => "name2000", "age" => "age2000" }.to_json
|
85
|
+
#
|
86
|
+
# df = instance.call(match: "key:1*", count: 200)
|
87
|
+
#
|
88
|
+
# #=> #<Daru::DataFrame(200x2)>
|
89
|
+
# # name age
|
90
|
+
# # key:1927 name1927 age1927
|
91
|
+
# # key:1759 name1759 age1759
|
92
|
+
# # key:1703 name1703 age1703
|
93
|
+
# # key:1640 name1640 age1640
|
94
|
+
# # ... ... ...
|
95
|
+
def call(*keys, match: nil, count: nil)
|
96
|
+
@match = match
|
97
|
+
@count = count
|
98
|
+
@keys = keys
|
99
|
+
@keys = choose_keys(*@keys).map(&:to_sym)
|
100
|
+
|
101
|
+
vals = @keys.map { |key| ::JSON.parse(@client.get(key), symbolize_names: true) }
|
102
|
+
Base.guess_parse(@keys, vals)
|
103
|
+
end
|
104
|
+
|
105
|
+
private
|
106
|
+
|
107
|
+
def choose_keys(*keys)
|
108
|
+
return keys.to_a unless keys.empty?
|
109
|
+
|
110
|
+
cursor = nil
|
111
|
+
# Loop to iterate through paginated results of Redis#scan.
|
112
|
+
until cursor == '0' || (!@count.nil? && keys.count > (@count-1))
|
113
|
+
cursor, chunk = @client.scan(cursor, match: @match, count: @count)
|
114
|
+
keys.concat(chunk).uniq!
|
115
|
+
end
|
116
|
+
return keys[0..-1] if @count.nil?
|
117
|
+
keys[0..@count-1]
|
118
|
+
end
|
119
|
+
|
120
|
+
def get_client(connection)
|
121
|
+
case connection
|
122
|
+
when ::Redis
|
123
|
+
connection
|
124
|
+
when Hash
|
125
|
+
::Redis.new connection
|
126
|
+
else
|
127
|
+
raise ArgumentError, "Expected '#{connection}' to be either "\
|
128
|
+
'a Hash or an initialized Redis instance, '\
|
129
|
+
"but received #{connection.class} instead."
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|