daru-io 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +11 -0
- data/.rspec +2 -0
- data/.rspec_formatter.rb +24 -0
- data/.rubocop.yml +109 -0
- data/.travis.yml +30 -0
- data/.yardopts +2 -0
- data/CODE_OF_CONDUCT.md +46 -0
- data/CONTRIBUTING.md +65 -0
- data/Gemfile +20 -0
- data/Guardfile +7 -0
- data/LICENSE.md +21 -0
- data/README.md +654 -0
- data/Rakefile +12 -0
- data/daru-io.gemspec +39 -0
- data/lib/daru/io.rb +3 -0
- data/lib/daru/io/base.rb +45 -0
- data/lib/daru/io/exporters.rb +1 -0
- data/lib/daru/io/exporters/avro.rb +96 -0
- data/lib/daru/io/exporters/base.rb +54 -0
- data/lib/daru/io/exporters/csv.rb +103 -0
- data/lib/daru/io/exporters/excel.rb +148 -0
- data/lib/daru/io/exporters/json.rb +570 -0
- data/lib/daru/io/exporters/r_data.rb +66 -0
- data/lib/daru/io/exporters/rds.rb +79 -0
- data/lib/daru/io/exporters/sql.rb +55 -0
- data/lib/daru/io/importers.rb +1 -0
- data/lib/daru/io/importers/active_record.rb +75 -0
- data/lib/daru/io/importers/avro.rb +54 -0
- data/lib/daru/io/importers/base.rb +62 -0
- data/lib/daru/io/importers/csv.rb +190 -0
- data/lib/daru/io/importers/excel.rb +99 -0
- data/lib/daru/io/importers/excelx.rb +138 -0
- data/lib/daru/io/importers/html.rb +144 -0
- data/lib/daru/io/importers/json.rb +152 -0
- data/lib/daru/io/importers/mongo.rb +139 -0
- data/lib/daru/io/importers/plaintext.rb +97 -0
- data/lib/daru/io/importers/r_data.rb +74 -0
- data/lib/daru/io/importers/rds.rb +67 -0
- data/lib/daru/io/importers/redis.rb +135 -0
- data/lib/daru/io/importers/sql.rb +127 -0
- data/lib/daru/io/link.rb +80 -0
- data/lib/daru/io/version.rb +5 -0
- metadata +269 -0
@@ -0,0 +1,99 @@
|
|
1
|
+
require 'daru/io/importers/base'
|
2
|
+
|
3
|
+
module Daru
|
4
|
+
module IO
|
5
|
+
module Importers
|
6
|
+
# Excel Importer Class, that extends `read_excel` method to `Daru::DataFrame`
|
7
|
+
#
|
8
|
+
# @see Daru::IO::Importers::Excelx For .xlsx format
|
9
|
+
class Excel < Base
|
10
|
+
Daru::DataFrame.register_io_module :read_excel do |*args, &io_block|
|
11
|
+
if args.first.end_with?('.xlsx')
|
12
|
+
require 'daru/io/importers/excelx'
|
13
|
+
Daru::IO::Importers::Excelx.new(*args[1..-1], &io_block).read(*args[0])
|
14
|
+
else
|
15
|
+
Daru::IO::Importers::Excel.new(*args[1..-1], &io_block).read(*args[0])
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
# Checks for required gem dependencies of Excel Importer
|
20
|
+
def initialize
|
21
|
+
optional_gem 'spreadsheet', '~> 1.1.1'
|
22
|
+
end
|
23
|
+
|
24
|
+
# Reads from an excel (.xls) file
|
25
|
+
#
|
26
|
+
# @!method self.read(path)
|
27
|
+
#
|
28
|
+
# @param path [String] Path of Excel file, where the DataFrame is to be imported from.
|
29
|
+
#
|
30
|
+
# @return [Daru::IO::Importers::Excel]
|
31
|
+
#
|
32
|
+
# @example Reading from an excel file
|
33
|
+
# instance = Daru::IO::Importers::Excel.read("test_xls.xls")
|
34
|
+
def read(path)
|
35
|
+
@file_data = Spreadsheet.open(path)
|
36
|
+
self
|
37
|
+
end
|
38
|
+
|
39
|
+
# Imports a `Daru::DataFrame` from an Excel Importer instance
|
40
|
+
#
|
41
|
+
# @param worksheet_id [Integer] The index of the worksheet in the excel file,
|
42
|
+
# from where the `Daru::DataFrame` will be imported. By default, the first
|
43
|
+
# worksheet has `:worksheet_id` as 0. In general, the n-th worksheet has
|
44
|
+
# its worksheet_id as n-1.
|
45
|
+
#
|
46
|
+
# If worksheet_id option is not given, it is taken as 0 by default and the
|
47
|
+
# `Daru::DataFrame` will be imported from the first worksheet in the excel file.
|
48
|
+
# @param headers [Boolean] Defaults to true. When set to true, first row of the
|
49
|
+
# given worksheet_id is used as the order of the Daru::DataFrame and data of
|
50
|
+
# the Dataframe consists of the remaining rows.
|
51
|
+
#
|
52
|
+
# @return [Daru::DataFrame]
|
53
|
+
#
|
54
|
+
# default_instance = Daru::IO::Importers::Excel.new
|
55
|
+
#
|
56
|
+
# @example Importing from a default worksheet
|
57
|
+
# df = instance.call
|
58
|
+
#
|
59
|
+
# #=> #<Daru::DataFrame(6x5)>
|
60
|
+
# # id name age city a1
|
61
|
+
# # 0 1 Alex 20 New York a,b
|
62
|
+
# # 1 2 Claude 23 London b,c
|
63
|
+
# # 2 3 Peter 25 London a
|
64
|
+
# # 3 4 Franz nil Paris nil
|
65
|
+
# # 4 5 George 5.5 Tome a,b,c
|
66
|
+
# # 5 6 Fernand nil nil nil
|
67
|
+
#
|
68
|
+
# @example Importing from a specific worksheet
|
69
|
+
# df = instance.call(worksheet_id: 0)
|
70
|
+
#
|
71
|
+
# #=> #<Daru::DataFrame(6x5)>
|
72
|
+
# # id name age city a1
|
73
|
+
# # 0 1 Alex 20 New York a,b
|
74
|
+
# # 1 2 Claude 23 London b,c
|
75
|
+
# # 2 3 Peter 25 London a
|
76
|
+
# # 3 4 Franz nil Paris nil
|
77
|
+
# # 4 5 George 5.5 Tome a,b,c
|
78
|
+
# # 5 6 Fernand nil nil nil
|
79
|
+
def call(worksheet_id: 0, headers: true)
|
80
|
+
worksheet = @file_data.worksheet(worksheet_id)
|
81
|
+
headers = if headers
|
82
|
+
ArrayHelper.recode_repeated(worksheet.row(0)).map(&:to_sym)
|
83
|
+
else
|
84
|
+
(0..worksheet.row(0).to_a.size-1).to_a
|
85
|
+
end
|
86
|
+
|
87
|
+
df = Daru::DataFrame.new({})
|
88
|
+
headers.each_with_index do |h,i|
|
89
|
+
col = worksheet.column(i).to_a
|
90
|
+
col.delete_at(0) if headers
|
91
|
+
df[h] = col
|
92
|
+
end
|
93
|
+
|
94
|
+
df
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
@@ -0,0 +1,138 @@
|
|
1
|
+
require 'daru/io/importers/base'
|
2
|
+
|
3
|
+
module Daru
|
4
|
+
module IO
|
5
|
+
module Importers
|
6
|
+
# Excelx Importer Class, that handles .xlsx files in the Excel Importer
|
7
|
+
#
|
8
|
+
# @see Daru::IO::Importers::Excel For .xls format
|
9
|
+
class Excelx < Base
|
10
|
+
# Checks for required gem dependencies of Excelx Importer
|
11
|
+
def initialize
|
12
|
+
optional_gem 'roo', '~> 2.7.0'
|
13
|
+
end
|
14
|
+
|
15
|
+
# Reads from an excelx (xlsx) file
|
16
|
+
#
|
17
|
+
# @!method self.read(path)
|
18
|
+
#
|
19
|
+
# @param path [String] Local / Remote path of xlsx file, where the DataFrame is
|
20
|
+
# to be imported from.
|
21
|
+
#
|
22
|
+
# @return [Daru::IO::Importers::Excelx]
|
23
|
+
#
|
24
|
+
# @example Reading from a local xlsx file
|
25
|
+
# local_instance = Daru::IO::Importers::Excelx.read("Stock-counts-sheet.xlsx")
|
26
|
+
#
|
27
|
+
# @example Reading from a remote xlsx file
|
28
|
+
# url = "https://www.exact.com/uk/images/downloads/getting-started-excel-sheets/Stock-counts-sheet.xlsx"
|
29
|
+
# remote_instance = Daru::IO::Importers::Excelx.read(url)
|
30
|
+
def read(path)
|
31
|
+
@file_data = Roo::Excelx.new(path)
|
32
|
+
self
|
33
|
+
end
|
34
|
+
|
35
|
+
# Imports a `Daru::DataFrame` from an Excelx Importer instance
|
36
|
+
#
|
37
|
+
# @param sheet [Integer or String] Imports from a specific sheet
|
38
|
+
# @param skiprows [Integer] Skips the first `:skiprows` number of rows from the
|
39
|
+
# sheet being parsed.
|
40
|
+
# @param skipcols [Integer] Skips the first `:skipcols` number of columns from the
|
41
|
+
# sheet being parsed.
|
42
|
+
# @param order [Boolean] Defaults to true. When set to true, first row of the
|
43
|
+
# given sheet is used as the order of the Daru::DataFrame and data of
|
44
|
+
# the Dataframe consists of the remaining rows.
|
45
|
+
# @param index [Boolean] Defaults to false. When set to true, first column of the
|
46
|
+
# given sheet is used as the index of the Daru::DataFrame and data of
|
47
|
+
# the Dataframe consists of the remaining columns.
|
48
|
+
#
|
49
|
+
# When set to false, a default order (0 to n-1) is chosen for the DataFrame,
|
50
|
+
# and the data of the DataFrame consists of all rows in the sheet.
|
51
|
+
#
|
52
|
+
# @return [Daru::DataFrame]
|
53
|
+
#
|
54
|
+
# @example Importing from specific sheet
|
55
|
+
# df = local_instance.call(sheet: 'Example Stock Counts')
|
56
|
+
#
|
57
|
+
# #=> <Daru::DataFrame(15x7)>
|
58
|
+
# # Status Stock coun Item code New Descriptio Stock coun Offset G/L
|
59
|
+
# # 0 H 1 nil nil New stock 2014-08-01 nil
|
60
|
+
# # 1 nil 1 IND300654 2 New stock 2014-08-01 51035
|
61
|
+
# # 2 nil 1 IND43201 5 New stock 2014-08-01 51035
|
62
|
+
# # 3 nil 1 OUT30045 3 New stock 2014-08-01 51035
|
63
|
+
# # ... ... ... ... ... ... ... ...
|
64
|
+
#
|
65
|
+
# @example Importing from a remote URL and default sheet
|
66
|
+
# df = remote_instance.call
|
67
|
+
#
|
68
|
+
# #=> <Daru::DataFrame(15x7)>
|
69
|
+
# # Status Stock coun Item code New Descriptio Stock coun Offset G/L
|
70
|
+
# # 0 H 1 nil nil New stock 2014-08-01 nil
|
71
|
+
# # 1 nil 1 IND300654 2 New stock 2014-08-01 51035
|
72
|
+
# # 2 nil 1 IND43201 5 New stock 2014-08-01 51035
|
73
|
+
# # 3 nil 1 OUT30045 3 New stock 2014-08-01 51035
|
74
|
+
# # ... ... ... ... ... ... ... ...
|
75
|
+
#
|
76
|
+
# @example Importing without headers
|
77
|
+
# df = local_instance.call(sheet: 'Example Stock Counts', headers: false)
|
78
|
+
#
|
79
|
+
# #=> <Daru::DataFrame(16x7)>
|
80
|
+
# # 0 1 2 3 4 5 6
|
81
|
+
# # 0 Status Stock coun Item code New Descriptio Stock coun Offset G/L
|
82
|
+
# # 1 H 1 nil nil New stock 2014-08-01 nil
|
83
|
+
# # 2 nil 1 IND300654 2 New stock 2014-08-01 51035
|
84
|
+
# # 3 nil 1 IND43201 5 New stock 2014-08-01 51035
|
85
|
+
# # 4 nil 1 OUT30045 3 New stock 2014-08-01 51035
|
86
|
+
# # ... ... ... ... ... ... ... ...
|
87
|
+
def call(sheet: 0, skiprows: 0, skipcols: 0, order: true, index: false)
|
88
|
+
@order = order
|
89
|
+
@index = index
|
90
|
+
worksheet = @file_data.sheet(sheet)
|
91
|
+
@data = strip_html_tags(skip_data(worksheet.to_a, skiprows, skipcols))
|
92
|
+
@index = process_index
|
93
|
+
@order = process_order || (0..@data.first.length-1)
|
94
|
+
@data = process_data
|
95
|
+
|
96
|
+
Daru::DataFrame.rows(@data, order: @order, index: @index)
|
97
|
+
end
|
98
|
+
|
99
|
+
private
|
100
|
+
|
101
|
+
def process_data
|
102
|
+
return skip_data(@data, 1, 1) if @order && @index
|
103
|
+
return skip_data(@data, 1, 0) if @order
|
104
|
+
return skip_data(@data, 0, 1) if @index
|
105
|
+
@data
|
106
|
+
end
|
107
|
+
|
108
|
+
def process_index
|
109
|
+
return nil unless @index
|
110
|
+
@index = @data.transpose.first
|
111
|
+
@index = skip_data(@index, 1) if @order
|
112
|
+
@index
|
113
|
+
end
|
114
|
+
|
115
|
+
def process_order
|
116
|
+
return nil unless @order
|
117
|
+
@order = @data.first
|
118
|
+
@order = skip_data(@order, 1) if @index
|
119
|
+
@order
|
120
|
+
end
|
121
|
+
|
122
|
+
def skip_data(data, rows, cols=nil)
|
123
|
+
return data[rows..-1].map { |row| row[cols..-1] } unless cols.nil?
|
124
|
+
data[rows..-1]
|
125
|
+
end
|
126
|
+
|
127
|
+
def strip_html_tags(data)
|
128
|
+
data.map do |row|
|
129
|
+
row.map do |ele|
|
130
|
+
next ele unless ele.is_a?(String)
|
131
|
+
ele.gsub(/<[^>]+>/, '')
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
@@ -0,0 +1,144 @@
|
|
1
|
+
require 'daru/io/importers/base'
|
2
|
+
|
3
|
+
module Daru
|
4
|
+
module IO
|
5
|
+
module Importers
|
6
|
+
# HTML Importer Class, that extends `read_html` method to `Daru::DataFrame`
|
7
|
+
#
|
8
|
+
# @note
|
9
|
+
# Please note that this module works only for static table elements on a
|
10
|
+
# HTML page, and won't work in cases where the data is being loaded into
|
11
|
+
# the HTML table by inline Javascript.
|
12
|
+
class HTML < Base
|
13
|
+
Daru::DataFrame.register_io_module :read_html, self
|
14
|
+
|
15
|
+
# Checks for required gem dependencies of HTML Importer
|
16
|
+
def initialize
|
17
|
+
require 'open-uri'
|
18
|
+
optional_gem 'nokogiri'
|
19
|
+
end
|
20
|
+
|
21
|
+
# Reads from a html file / website
|
22
|
+
#
|
23
|
+
# @!method self.read(path)
|
24
|
+
#
|
25
|
+
# @param path [String] Website URL / path to HTML file, where the
|
26
|
+
# DataFrame is to be imported from.
|
27
|
+
#
|
28
|
+
# @return [Daru::IO::Importers::HTML]
|
29
|
+
#
|
30
|
+
# @example Reading from a website url file
|
31
|
+
# instance = Daru::IO::Importers::HTML.read('http://www.moneycontrol.com/')
|
32
|
+
def read(path)
|
33
|
+
@file_data = Nokogiri.parse(open(path).read)
|
34
|
+
self
|
35
|
+
end
|
36
|
+
|
37
|
+
# Imports Array of `Daru::DataFrame`s from a HTML Importer instance
|
38
|
+
#
|
39
|
+
# @param match [String] A `String` to match and choose a particular table(s)
|
40
|
+
# from multiple tables of a HTML page.
|
41
|
+
# @param index [Array or Daru::Index or Daru::MultiIndex] If given, it
|
42
|
+
# overrides the parsed index. Have a look at `:index` option, at
|
43
|
+
# {http://www.rubydoc.info/gems/daru/0.1.5/Daru%2FDataFrame:initialize
|
44
|
+
# Daru::DataFrame#initialize}
|
45
|
+
# @param order [Array or Daru::Index or Daru::MultiIndex] If given, it
|
46
|
+
# overrides the parsed order. Have a look at `:order` option
|
47
|
+
# [here](http://www.rubydoc.info/gems/daru/0.1.5/Daru%2FDataFrame:initialize)
|
48
|
+
# @param name [String] As `name` of the imported `Daru::DataFrame` isn't
|
49
|
+
# parsed automatically by the module, users can set the name attribute to
|
50
|
+
# their `Daru::DataFrame` manually, through this option.
|
51
|
+
#
|
52
|
+
# See `:name` option
|
53
|
+
# [here](http://www.rubydoc.info/gems/daru/0.1.5/Daru%2FDataFrame:initialize)
|
54
|
+
#
|
55
|
+
# @return [Array<Daru::DataFrame>]
|
56
|
+
#
|
57
|
+
# @example Importing with matching tables
|
58
|
+
# list_of_dfs = instance.call(match: 'Sun Pharma')
|
59
|
+
# list_of_dfs.count
|
60
|
+
# #=> 4
|
61
|
+
#
|
62
|
+
# df = list_of_dfs.first
|
63
|
+
#
|
64
|
+
# # As the website keeps changing everyday, the output might not be exactly
|
65
|
+
# # the same as the one obtained below. Nevertheless, a Daru::DataFrame
|
66
|
+
# # should be obtained (as long as 'Sun Pharma' is there on the website).
|
67
|
+
#
|
68
|
+
# #=> <Daru::DataFrame(5x4)>
|
69
|
+
# # Company Price Change Value (Rs
|
70
|
+
# # 0 Sun Pharma 502.60 -65.05 2,117.87
|
71
|
+
# # 1 Reliance 1356.90 19.60 745.10
|
72
|
+
# # 2 Tech Mahin 379.45 -49.70 650.22
|
73
|
+
# # 3 ITC 315.85 6.75 621.12
|
74
|
+
# # 4 HDFC 1598.85 50.95 553.91
|
75
|
+
def call(match: nil, order: nil, index: nil, name: nil)
|
76
|
+
@match = match
|
77
|
+
@options = {name: name, index: index, order: order}
|
78
|
+
|
79
|
+
@file_data
|
80
|
+
.search('table')
|
81
|
+
.map { |table| parse_table(table) }
|
82
|
+
.compact
|
83
|
+
.keep_if { |table| satisfy_dimension(table) && search(table) }
|
84
|
+
.map { |table| decide_values(table, @options) }
|
85
|
+
.map { |table| table_to_dataframe(table) }
|
86
|
+
end
|
87
|
+
|
88
|
+
private
|
89
|
+
|
90
|
+
# Allows user to override the scraped order / index / data
|
91
|
+
def decide_values(scraped_val, user_val)
|
92
|
+
scraped_val.merge(user_val) { |_key, scraped, user| user || scraped }
|
93
|
+
end
|
94
|
+
|
95
|
+
# Splits headers (all th tags) into order and index. Wherein,
|
96
|
+
# Order : All <th> tags on first proper row of HTML table
|
97
|
+
# index : All <th> tags on first proper column of HTML table
|
98
|
+
def parse_hash(headers, size, headers_size)
|
99
|
+
headers_index = headers.find_index { |x| x.count == headers_size }
|
100
|
+
order = headers[headers_index]
|
101
|
+
order_index = order.count - size
|
102
|
+
order = order[order_index..-1]
|
103
|
+
indice = headers[headers_index+1..-1].flatten
|
104
|
+
indice = nil if indice.to_a.empty?
|
105
|
+
[order, indice]
|
106
|
+
end
|
107
|
+
|
108
|
+
def parse_table(table)
|
109
|
+
headers, headers_size = scrape_tag(table,'th')
|
110
|
+
data, size = scrape_tag(table, 'td')
|
111
|
+
data = data.keep_if { |x| x.count == size }
|
112
|
+
order, indice = parse_hash(headers, size, headers_size) if headers_size >= size
|
113
|
+
return unless (indice.nil? || indice.count == data.count) && !order.nil? && order.count>0
|
114
|
+
{data: data.compact, index: indice, order: order}
|
115
|
+
end
|
116
|
+
|
117
|
+
def scrape_tag(table, tag)
|
118
|
+
arr = table.search('tr').map { |row| row.search(tag).map { |val| val.text.strip } }
|
119
|
+
size = arr.map(&:count).max
|
120
|
+
[arr, size]
|
121
|
+
end
|
122
|
+
|
123
|
+
def satisfy_dimension(table)
|
124
|
+
return false if @options[:order] && table[:data].first.size != @options[:order].size
|
125
|
+
return false if @options[:index] && table[:data].size != @options[:index].size
|
126
|
+
true
|
127
|
+
end
|
128
|
+
|
129
|
+
def search(table)
|
130
|
+
@match.nil? ? true : table.to_s.include?(@match)
|
131
|
+
end
|
132
|
+
|
133
|
+
def table_to_dataframe(table)
|
134
|
+
Daru::DataFrame.rows(
|
135
|
+
table[:data],
|
136
|
+
index: table[:index],
|
137
|
+
order: table[:order],
|
138
|
+
name: table[:name]
|
139
|
+
)
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
@@ -0,0 +1,152 @@
|
|
1
|
+
require 'daru/io/importers/base'
|
2
|
+
|
3
|
+
module Daru
|
4
|
+
module IO
|
5
|
+
module Importers
|
6
|
+
# JSON Importer Class, that extends `from_json` and `read_json` methods
|
7
|
+
# to `Daru::DataFrame`
|
8
|
+
class JSON < Base
|
9
|
+
Daru::DataFrame.register_io_module :from_json, self
|
10
|
+
Daru::DataFrame.register_io_module :read_json, self
|
11
|
+
|
12
|
+
# Checks for required gem dependencies of JSON Importer
|
13
|
+
def initialize
|
14
|
+
require 'open-uri'
|
15
|
+
require 'json'
|
16
|
+
optional_gem 'jsonpath'
|
17
|
+
end
|
18
|
+
|
19
|
+
# Reads data from a json file / remote json response
|
20
|
+
#
|
21
|
+
# @!method self.read(path)
|
22
|
+
#
|
23
|
+
# @param path [String] Local / Remote path to json file, where the dataframe is to be imported
|
24
|
+
# from.
|
25
|
+
#
|
26
|
+
# @return [Daru::IO::Importers::JSON]
|
27
|
+
#
|
28
|
+
# @example Reading from simply nested remote json response
|
29
|
+
# url = 'https://data.nasa.gov/resource/2vr3-k9wn.json'
|
30
|
+
# simple_read_instance = Daru::IO::Importers::JSON.read(url)
|
31
|
+
#
|
32
|
+
# @example Reading from complexy nested remote json response
|
33
|
+
# url = 'http://api.tvmaze.com/singlesearch/shows?q=game-of-thrones&embed=episodes'
|
34
|
+
# complex_read_instance = Daru::IO::Importers::JSON.read(url)
|
35
|
+
def read(path)
|
36
|
+
@file_data = ::JSON.parse(open(path).read)
|
37
|
+
@json = @file_data
|
38
|
+
self
|
39
|
+
end
|
40
|
+
|
41
|
+
# Loads from a Ruby structure of Hashes and / or Arrays
|
42
|
+
#
|
43
|
+
# @!method self.from(instance)
|
44
|
+
#
|
45
|
+
# @param instance [Hash or Array] A simple / complexly nested JSON structure
|
46
|
+
#
|
47
|
+
# @return [Daru::IO::Importers::JSON]
|
48
|
+
#
|
49
|
+
# @example Loading from Ruby Hash of Arrays
|
50
|
+
# from_instance = Daru::IO::Importers::JSON.from({x: [1,4], y: [2,5], z: [3, 6]})
|
51
|
+
def from(instance)
|
52
|
+
@file_data = instance
|
53
|
+
@json = @file_data.is_a?(String) ? ::JSON.parse(@file_data) : @file_data
|
54
|
+
self
|
55
|
+
end
|
56
|
+
|
57
|
+
# Imports a `Daru::DataFrame` from a JSON Importer instance
|
58
|
+
#
|
59
|
+
# @param columns [Array] JSON-path slectors to select specific fields
|
60
|
+
# from the JSON input.
|
61
|
+
# @param order [String or Array] Either a JSON-path selector string, or
|
62
|
+
# an array containing the order of the `Daru::DataFrame`.
|
63
|
+
# @param index [String or Array] Either a JSON-path selector string, or
|
64
|
+
# an array containing the order of the `Daru::DataFrame`.
|
65
|
+
# @param named_columns [Hash] JSON-path slectors to select specific fields
|
66
|
+
# from the JSON input.
|
67
|
+
#
|
68
|
+
# @return [Daru::DataFrame]
|
69
|
+
#
|
70
|
+
# @note For more information on using JSON-path selectors, have a look at
|
71
|
+
# the explanations {http://www.rubydoc.info/gems/jsonpath/0.5.8 here}
|
72
|
+
# and {http://goessner.net/articles/JsonPath/ here}.
|
73
|
+
#
|
74
|
+
# @example Importing without jsonpath selectors
|
75
|
+
# df = simple_read_instance.call
|
76
|
+
#
|
77
|
+
# #=> #<Daru::DataFrame(202x10)>
|
78
|
+
# # designation discovery_ h_mag i_deg moid_au orbit_clas period_yr ...
|
79
|
+
# # 0 419880 (20 2011-01-07 19.7 9.65 0.035 Apollo 4.06 ...
|
80
|
+
# # 1 419624 (20 2010-09-17 20.5 14.52 0.028 Apollo 1 ...
|
81
|
+
# # 2 414772 (20 2010-07-28 19 23.11 0.333 Apollo 1.31 ...
|
82
|
+
# # ... ... ... ... ... ... ... ... ...
|
83
|
+
#
|
84
|
+
# @example Importing with jsonpath selectors
|
85
|
+
# df = complex_read_instance.call(
|
86
|
+
# "$.._embedded..episodes..name",
|
87
|
+
# "$.._embedded..episodes..season",
|
88
|
+
# "$.._embedded..episodes..number",
|
89
|
+
# index: (10..70).to_a,
|
90
|
+
# RunTime: "$.._embedded..episodes..runtime"
|
91
|
+
# )
|
92
|
+
#
|
93
|
+
# #=> #<Daru::DataFrame(61x4)>
|
94
|
+
# # name season number RunTime
|
95
|
+
# # 10 Winter is 1 1 60
|
96
|
+
# # 11 The Kingsr 1 2 60
|
97
|
+
# # 12 Lord Snow 1 3 60
|
98
|
+
# # ... ... ... ... ...
|
99
|
+
#
|
100
|
+
# @example Importing from `from` method
|
101
|
+
# df = from_instance.call
|
102
|
+
#
|
103
|
+
# #=> #<Daru::DataFrame(2x3)>
|
104
|
+
# # x y z
|
105
|
+
# # 0 1 2 3
|
106
|
+
# # 1 4 5 6
|
107
|
+
def call(*columns, order: nil, index: nil, **named_columns)
|
108
|
+
init_opts(*columns, order: order, index: index, **named_columns)
|
109
|
+
@data = fetch_data
|
110
|
+
@index = at_jsonpath(@index)
|
111
|
+
@order = at_jsonpath(@order)
|
112
|
+
@order ||= Array.new(@columns.count) { |x| x } + @named_columns.keys
|
113
|
+
|
114
|
+
Daru::DataFrame.new(@data, order: @order, index: @index)
|
115
|
+
end
|
116
|
+
|
117
|
+
private
|
118
|
+
|
119
|
+
def at_jsonpath(jsonpath)
|
120
|
+
jsonpath.is_a?(String) ? JsonPath.on(@json, jsonpath) : jsonpath
|
121
|
+
end
|
122
|
+
|
123
|
+
def fetch_data
|
124
|
+
return @json if @columns.empty? && @named_columns.empty?
|
125
|
+
|
126
|
+
# If only one unnamed column is provided without any named_columns,
|
127
|
+
# entire dataset is assumed to reside in that JSON-path.
|
128
|
+
return at_jsonpath(@columns.first) if @columns.size == 1 && @named_columns.empty?
|
129
|
+
data_columns = @columns + @named_columns.values
|
130
|
+
data_columns.map { |col| at_jsonpath(col) }
|
131
|
+
end
|
132
|
+
|
133
|
+
def init_opts(*columns, order: nil, index: nil, **named_columns)
|
134
|
+
@columns = columns
|
135
|
+
@order = order
|
136
|
+
@index = index
|
137
|
+
@named_columns = named_columns
|
138
|
+
|
139
|
+
validate_params
|
140
|
+
end
|
141
|
+
|
142
|
+
def validate_params
|
143
|
+
return if @order.nil? || @named_columns.empty?
|
144
|
+
|
145
|
+
raise ArgumentError,
|
146
|
+
'Do not pass on order and named columns together, at the same '\
|
147
|
+
'function call. Please use only order or only named_columns.'
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|