daru-io 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (44) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +11 -0
  3. data/.rspec +2 -0
  4. data/.rspec_formatter.rb +24 -0
  5. data/.rubocop.yml +109 -0
  6. data/.travis.yml +30 -0
  7. data/.yardopts +2 -0
  8. data/CODE_OF_CONDUCT.md +46 -0
  9. data/CONTRIBUTING.md +65 -0
  10. data/Gemfile +20 -0
  11. data/Guardfile +7 -0
  12. data/LICENSE.md +21 -0
  13. data/README.md +654 -0
  14. data/Rakefile +12 -0
  15. data/daru-io.gemspec +39 -0
  16. data/lib/daru/io.rb +3 -0
  17. data/lib/daru/io/base.rb +45 -0
  18. data/lib/daru/io/exporters.rb +1 -0
  19. data/lib/daru/io/exporters/avro.rb +96 -0
  20. data/lib/daru/io/exporters/base.rb +54 -0
  21. data/lib/daru/io/exporters/csv.rb +103 -0
  22. data/lib/daru/io/exporters/excel.rb +148 -0
  23. data/lib/daru/io/exporters/json.rb +570 -0
  24. data/lib/daru/io/exporters/r_data.rb +66 -0
  25. data/lib/daru/io/exporters/rds.rb +79 -0
  26. data/lib/daru/io/exporters/sql.rb +55 -0
  27. data/lib/daru/io/importers.rb +1 -0
  28. data/lib/daru/io/importers/active_record.rb +75 -0
  29. data/lib/daru/io/importers/avro.rb +54 -0
  30. data/lib/daru/io/importers/base.rb +62 -0
  31. data/lib/daru/io/importers/csv.rb +190 -0
  32. data/lib/daru/io/importers/excel.rb +99 -0
  33. data/lib/daru/io/importers/excelx.rb +138 -0
  34. data/lib/daru/io/importers/html.rb +144 -0
  35. data/lib/daru/io/importers/json.rb +152 -0
  36. data/lib/daru/io/importers/mongo.rb +139 -0
  37. data/lib/daru/io/importers/plaintext.rb +97 -0
  38. data/lib/daru/io/importers/r_data.rb +74 -0
  39. data/lib/daru/io/importers/rds.rb +67 -0
  40. data/lib/daru/io/importers/redis.rb +135 -0
  41. data/lib/daru/io/importers/sql.rb +127 -0
  42. data/lib/daru/io/link.rb +80 -0
  43. data/lib/daru/io/version.rb +5 -0
  44. metadata +269 -0
@@ -0,0 +1,570 @@
1
+ require 'daru/io/exporters/base'
2
+
3
+ module Daru
4
+ module IO
5
+ module Exporters
6
+ # JSON Exporter Class, that extends `to_json`, `to_json_string` and `write_json` methods
7
+ # to `Daru::DataFrame` instance variables
8
+ class JSON < Base
9
+ Daru::DataFrame.register_io_module :to_json, self
10
+ Daru::DataFrame.register_io_module :to_json_string, self
11
+ Daru::DataFrame.register_io_module :write_json, self
12
+
13
+ ORIENT_TYPES = %i[index records split values].freeze
14
+
15
+ # Initializes a JSON Exporter instance.
16
+ #
17
+ # @param dataframe [Daru::DataFrame] A dataframe to export
18
+ # @param orient [Symbol] Setting to export the data in a specific structure.
19
+ # Defaults to `:records`.
20
+ #
21
+ # - `:values` : Returns a 2D array containing the data in the DataFrame.
22
+ # - `:split` : Returns a `Hash`, containing keys `:vectors`, `:index` and `:data`.
23
+ # - `:records` : Returns an Array of Hashes with given JsonPath content.
24
+ # - `:index` : Returns a Hash of Hashes with index values as keys,
25
+ # and given JsonPath content as values.
26
+ #
27
+ # After choosing an `:orient` option, the JSON content can be manipulated before
28
+ # writing into the JSON file, by providing a block.
29
+ #
30
+ # @param pretty [Boolean] When set to true, the data is pretty-printed to the
31
+ # JSON file.
32
+ # @param jsonpaths [Hash] JsonPaths to export given vectors into a compexly nested
33
+ # JSON structure.
34
+ #
35
+ # @example Initializing a JSON Exporter instance
36
+ # df = Daru::DataFrame.new(
37
+ # [
38
+ # {name: 'Jon Snow', age: 18, sex: 'Male'},
39
+ # {name: 'Rhaegar Targaryen', age: 54, sex: 'Male'},
40
+ # {name: 'Lyanna Stark', age: 36, sex: 'Female'}
41
+ # ],
42
+ # order: %i[name age sex],
43
+ # index: %i[child dad mom]
44
+ # )
45
+ #
46
+ # #=> #<Daru::DataFrame(3x3)>
47
+ # # name age sex
48
+ # # child Jon Snow 18 Male
49
+ # # dad Rhaegar Ta 54 Male
50
+ # # mom Lyanna Sta 36 Female
51
+ #
52
+ # json_exporter = Daru::IO::Exporters::JSON
53
+ #
54
+ # index_instance = json_exporter.new(df, orient: :index, pretty: true)
55
+ # records_instance = json_exporter.new(df,orient: :records, pretty: true)
56
+ # values_instance = json_exporter.new(df, orient: :values, pretty: true)
57
+ # split_instance = json_exporter.new(df, orient: :split, pretty: true)
58
+ # static_jsonpath_instance = json_exporter.new(
59
+ # df, pretty: true, name: '$.specific.name', age: '$.common.age', sex: '$.common.gender'
60
+ # )
61
+ # dynamic_jsonpath_instance = json_exporter.new(
62
+ # df, pretty: true, age: '$.{name}.age', sex: '$.{name}.gender'
63
+ # )
64
+ # block_instance = json_exporter.new(df, orient: :index, pretty: true) do |json|
65
+ # json.map { |j| [j.keys.first, j.values.first] }.to_h
66
+ # end
67
+ def initialize(dataframe, orient: :records, pretty: false, **jsonpaths, &block)
68
+ require 'json'
69
+ optional_gem 'jsonpath'
70
+
71
+ super(dataframe)
72
+ @block = block
73
+ @orient = orient
74
+ @pretty = pretty
75
+ @jsonpath_hash = jsonpaths.empty? ? nil : jsonpaths
76
+
77
+ validate_params
78
+ end
79
+
80
+ # Exports a JSON Exporter instance to a file-writable String.
81
+ #
82
+ # @return [String] A file-writable string
83
+ #
84
+ # @example Getting a file-writable string with default orient: :records
85
+ # records_instance.to_s
86
+ #
87
+ # #=>
88
+ # # [
89
+ # # {
90
+ # # "sex": "Male",
91
+ # # "age": 18,
92
+ # # "name": "Jon Snow"
93
+ # # },
94
+ # # {
95
+ # # "sex": "Male",
96
+ # # "age": 54,
97
+ # # "name": "Rhaegar Targaryen"
98
+ # # },
99
+ # # {
100
+ # # "sex": "Female",
101
+ # # "age": 36,
102
+ # # "name": "Lyanna Stark"
103
+ # # }
104
+ # # ]
105
+ #
106
+ # @example Getting a file-writable string with orient: :index
107
+ # index_instance.to_s
108
+ #
109
+ # #=>
110
+ # # [
111
+ # # {
112
+ # # "child": {
113
+ # # "sex": "Male",
114
+ # # "age": 18,
115
+ # # "name": "Jon Snow"
116
+ # # }
117
+ # # },
118
+ # # {
119
+ # # "dad": {
120
+ # # "sex": "Male",
121
+ # # "age": 54,
122
+ # # "name": "Rhaegar Targaryen"
123
+ # # }
124
+ # # },
125
+ # # {
126
+ # # "mom": {
127
+ # # "sex": "Female",
128
+ # # "age": 36,
129
+ # # "name": "Lyanna Stark"
130
+ # # }
131
+ # # }
132
+ # # ]
133
+ #
134
+ # @example Getting a file-writable string with orient: :values
135
+ # values_instance.to_s
136
+ #
137
+ # #=>
138
+ # # [
139
+ # # [
140
+ # # "Jon Snow",
141
+ # # "Rhaegar Targaryen",
142
+ # # "Lyanna Stark"
143
+ # # ],
144
+ # # [
145
+ # # 18,
146
+ # # 54,
147
+ # # 36
148
+ # # ],
149
+ # # [
150
+ # # "Male",
151
+ # # "Male",
152
+ # # "Female"
153
+ # # ]
154
+ # # ]
155
+ #
156
+ # @example Getting a file-writable string with orient: :split
157
+ # split_instance.to_s
158
+ #
159
+ # #=>
160
+ # # {
161
+ # # "vectors": [
162
+ # # "name",
163
+ # # "age",
164
+ # # "sex"
165
+ # # ],
166
+ # # "index": [
167
+ # # "child",
168
+ # # "dad",
169
+ # # "mom"
170
+ # # ],
171
+ # # "data": [
172
+ # # [
173
+ # # "Jon Snow",
174
+ # # "Rhaegar Targaryen",
175
+ # # "Lyanna Stark"
176
+ # # ],
177
+ # # [
178
+ # # 18,
179
+ # # 54,
180
+ # # 36
181
+ # # ],
182
+ # # [
183
+ # # "Male",
184
+ # # "Male",
185
+ # # "Female"
186
+ # # ]
187
+ # # ]
188
+ # # }
189
+ #
190
+ # @example Getting a file-writable string with static nested JsonPaths
191
+ # static_jsonpath_instance.to_s
192
+ #
193
+ # #=>
194
+ # # [
195
+ # # {
196
+ # # "common": {
197
+ # # "gender": "Male",
198
+ # # "age": 18
199
+ # # },
200
+ # # "specific": {
201
+ # # "name": "Jon Snow"
202
+ # # }
203
+ # # },
204
+ # # {
205
+ # # "common": {
206
+ # # "gender": "Male",
207
+ # # "age": 54
208
+ # # },
209
+ # # "specific": {
210
+ # # "name": "Rhaegar Targaryen"
211
+ # # }
212
+ # # },
213
+ # # {
214
+ # # "common": {
215
+ # # "gender": "Female",
216
+ # # "age": 36
217
+ # # },
218
+ # # "specific": {
219
+ # # "name": "Lyanna Stark"
220
+ # # }
221
+ # # }
222
+ # # ]
223
+ #
224
+ # @example Getting a file-writable string with dynamic JsonPaths
225
+ # dynamic_jsonpath_instance.to_s
226
+ #
227
+ # #=>
228
+ # # [
229
+ # # {
230
+ # # "Jon Snow": {
231
+ # # "gender": "Male",
232
+ # # "age": 18
233
+ # # }
234
+ # # },
235
+ # # {
236
+ # # "Rhaegar Targaryen": {
237
+ # # "gender": "Male",
238
+ # # "age": 54
239
+ # # }
240
+ # # },
241
+ # # {
242
+ # # "Lyanna Stark": {
243
+ # # "gender": "Female",
244
+ # # "age": 36
245
+ # # }
246
+ # # }
247
+ # # ]
248
+ #
249
+ # @example Getting a file-writable string with orient: :index and block
250
+ # block_instance.to_s
251
+ #
252
+ # #=>
253
+ # # {
254
+ # # "child": {
255
+ # # "sex": "Male",
256
+ # # "age": 18,
257
+ # # "name": "Jon Snow"
258
+ # # },
259
+ # # "dad": {
260
+ # # "sex": "Male",
261
+ # # "age": 54,
262
+ # # "name": "Rhaegar Targaryen"
263
+ # # },
264
+ # # "mom": {
265
+ # # "sex": "Female",
266
+ # # "age": 36,
267
+ # # "name": "Lyanna Stark"
268
+ # # }
269
+ # # }
270
+ def to_s
271
+ super
272
+ end
273
+
274
+ # Exports a JSON Exporter instance to a Ruby structure comprising of Arrays & Hashes.
275
+ #
276
+ # @return [Array or Hash]
277
+ #
278
+ # @example With default orient: :records
279
+ # records_instance.to
280
+ #
281
+ # #=>
282
+ # # [
283
+ # # {
284
+ # # "sex": "Male",
285
+ # # "age": 18,
286
+ # # "name": "Jon Snow"
287
+ # # },
288
+ # # {
289
+ # # "sex": "Male",
290
+ # # "age": 54,
291
+ # # "name": "Rhaegar Targaryen"
292
+ # # },
293
+ # # {
294
+ # # "sex": "Female",
295
+ # # "age": 36,
296
+ # # "name": "Lyanna Stark"
297
+ # # }
298
+ # # ]
299
+ #
300
+ # @example With orient: :index
301
+ # index_instance.to
302
+ #
303
+ # #=>
304
+ # # [
305
+ # # {
306
+ # # "child": {
307
+ # # "sex": "Male",
308
+ # # "age": 18,
309
+ # # "name": "Jon Snow"
310
+ # # }
311
+ # # },
312
+ # # {
313
+ # # "dad": {
314
+ # # "sex": "Male",
315
+ # # "age": 54,
316
+ # # "name": "Rhaegar Targaryen"
317
+ # # }
318
+ # # },
319
+ # # {
320
+ # # "mom": {
321
+ # # "sex": "Female",
322
+ # # "age": 36,
323
+ # # "name": "Lyanna Stark"
324
+ # # }
325
+ # # }
326
+ # # ]
327
+ #
328
+ # @example With orient: :values
329
+ # values_instance.to
330
+ #
331
+ # #=>
332
+ # # [
333
+ # # [
334
+ # # "Jon Snow",
335
+ # # "Rhaegar Targaryen",
336
+ # # "Lyanna Stark"
337
+ # # ],
338
+ # # [
339
+ # # 18,
340
+ # # 54,
341
+ # # 36
342
+ # # ],
343
+ # # [
344
+ # # "Male",
345
+ # # "Male",
346
+ # # "Female"
347
+ # # ]
348
+ # # ]
349
+ #
350
+ # @example With orient: :split
351
+ # split_instance.to
352
+ #
353
+ # #=>
354
+ # # {
355
+ # # "vectors": [
356
+ # # "name",
357
+ # # "age",
358
+ # # "sex"
359
+ # # ],
360
+ # # "index": [
361
+ # # "child",
362
+ # # "dad",
363
+ # # "mom"
364
+ # # ],
365
+ # # "data": [
366
+ # # [
367
+ # # "Jon Snow",
368
+ # # "Rhaegar Targaryen",
369
+ # # "Lyanna Stark"
370
+ # # ],
371
+ # # [
372
+ # # 18,
373
+ # # 54,
374
+ # # 36
375
+ # # ],
376
+ # # [
377
+ # # "Male",
378
+ # # "Male",
379
+ # # "Female"
380
+ # # ]
381
+ # # ]
382
+ # # }
383
+ #
384
+ # @example With static nested JsonPaths
385
+ # static_jsonpath_instance.to
386
+ #
387
+ # #=>
388
+ # # [
389
+ # # {
390
+ # # "common": {
391
+ # # "gender": "Male",
392
+ # # "age": 18
393
+ # # },
394
+ # # "specific": {
395
+ # # "name": "Jon Snow"
396
+ # # }
397
+ # # },
398
+ # # {
399
+ # # "common": {
400
+ # # "gender": "Male",
401
+ # # "age": 54
402
+ # # },
403
+ # # "specific": {
404
+ # # "name": "Rhaegar Targaryen"
405
+ # # }
406
+ # # },
407
+ # # {
408
+ # # "common": {
409
+ # # "gender": "Female",
410
+ # # "age": 36
411
+ # # },
412
+ # # "specific": {
413
+ # # "name": "Lyanna Stark"
414
+ # # }
415
+ # # }
416
+ # # ]
417
+ #
418
+ # @example With dynamic JsonPaths
419
+ # dynamic_jsonpath_instance.to
420
+ #
421
+ # #=>
422
+ # # [
423
+ # # {
424
+ # # "Jon Snow": {
425
+ # # "gender": "Male",
426
+ # # "age": 18
427
+ # # }
428
+ # # },
429
+ # # {
430
+ # # "Rhaegar Targaryen": {
431
+ # # "gender": "Male",
432
+ # # "age": 54
433
+ # # }
434
+ # # },
435
+ # # {
436
+ # # "Lyanna Stark": {
437
+ # # "gender": "Female",
438
+ # # "age": 36
439
+ # # }
440
+ # # }
441
+ # # ]
442
+ #
443
+ # @example With orient: :index and block
444
+ # block_instance.to
445
+ #
446
+ # #=>
447
+ # # {
448
+ # # "child": {
449
+ # # "sex": "Male",
450
+ # # "age": 18,
451
+ # # "name": "Jon Snow"
452
+ # # },
453
+ # # "dad": {
454
+ # # "sex": "Male",
455
+ # # "age": 54,
456
+ # # "name": "Rhaegar Targaryen"
457
+ # # },
458
+ # # "mom": {
459
+ # # "sex": "Female",
460
+ # # "age": 36,
461
+ # # "name": "Lyanna Stark"
462
+ # # }
463
+ # # }
464
+ def to
465
+ @jsonpath_hash ||= @dataframe.vectors.to_a.map { |v| {v => "$.#{v}"} }.reduce(:merge)
466
+ @vectors = @jsonpath_hash.keys
467
+ @jsonpaths = process_jsonpath
468
+ @json_content = process_json_content
469
+ @json_content = @block.call(@json_content) if @block
470
+
471
+ @json_content
472
+ end
473
+
474
+ # Exports a JSON Exporter instance to a json file.
475
+ #
476
+ # @param path [String] Path of JSON file where the dataframe is to be saved
477
+ #
478
+ # @example Writing a JSON Exporter instance to a JSON file
479
+ # index_instance.write('index.json')
480
+ # split_instance.write('split.json')
481
+ # values_instance.write('values.json')
482
+ # records_instance.write('records.json')
483
+ # static_jsonpath_instance.write('static.json')
484
+ # dynamic_jsonpath_instance.write('dynamic.json')
485
+ # block_instance.write('block.json')
486
+ def write(path)
487
+ File.open(path, 'w') do |file|
488
+ file.write(::JSON.send(@pretty ? :pretty_generate : :generate, to))
489
+ end
490
+ end
491
+
492
+ private
493
+
494
+ def both_are?(class_name, obj1, obj2)
495
+ obj1.is_a?(class_name) && obj2.is_a?(class_name)
496
+ end
497
+
498
+ def deep_merge(source, dest)
499
+ return source if dest.nil?
500
+ return dest if source.nil?
501
+
502
+ return dest | source if both_are?(Array, source, dest)
503
+ return source unless both_are?(Hash, source, dest)
504
+
505
+ source.each do |src_key, src_value|
506
+ dest[src_key] = dest[src_key] ? deep_merge(src_value, dest[src_key]) : src_value
507
+ end
508
+ dest
509
+ end
510
+
511
+ def handle_dynamic_keys(sub_path, idx, row)
512
+ return idx.to_sym if sub_path.to_s == 'index}'
513
+ if sub_path.to_s.end_with? '}'
514
+ val = row[sub_path.to_s.delete('}').to_sym]
515
+ return val.to_i if val.to_i.to_s == val
516
+ return val.to_sym
517
+ end
518
+ sub_path
519
+ end
520
+
521
+ def init_hash_rec(jsonpaths, hash, jsonpath_key, row, idx)
522
+ key = handle_dynamic_keys(jsonpaths[0], idx, row)
523
+ if jsonpaths.count == 1
524
+ hash[key] = jsonpath_key == :index ? idx : row[jsonpath_key]
525
+ else
526
+ init_hash_rec(jsonpaths[1..-1], hash[key], jsonpath_key, row, idx)
527
+ end
528
+ hash
529
+ end
530
+
531
+ def init_hash(jsonpaths, jsonpath_keys, row, idx)
532
+ jsonpaths.map.with_index do |path, i|
533
+ init_hash_rec(path, Hash.new { |h,k| h[k] = Hash.new(&h.default_proc) }, jsonpath_keys[i], row, idx)
534
+ end.reduce { |cumulative, current| deep_merge(cumulative, current) }
535
+ end
536
+
537
+ def process_json_content
538
+ return @dataframe.map_vectors(&:to_a) if @orient == :values
539
+
540
+ if @orient == :split
541
+ return {
542
+ vectors: @dataframe.vectors.to_a,
543
+ index: @dataframe.index.to_a,
544
+ data: @dataframe.map_vectors(&:to_a)
545
+ }
546
+ end
547
+
548
+ @dataframe.map_rows_with_index do |row, idx|
549
+ next init_hash(@jsonpaths, @vectors, row, idx) if @orient == :records
550
+ {idx => init_hash(@jsonpaths, @vectors, row, idx)}
551
+ end
552
+ end
553
+
554
+ def process_jsonpath
555
+ @jsonpath_hash.values.map do |x|
556
+ (JsonPath.new(x).path - %w[$ ${ . .. ..{]).map do |y|
557
+ v = y.delete("'.[]{")
558
+ next v.to_i if v.to_i.to_s == v
559
+ v.to_sym
560
+ end
561
+ end
562
+ end
563
+
564
+ def validate_params
565
+ raise ArgumentError, "Invalid orient option '#{@orient}' given." unless ORIENT_TYPES.include?(@orient)
566
+ end
567
+ end
568
+ end
569
+ end
570
+ end