daru-io 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +11 -0
  3. data/.rspec +2 -0
  4. data/.rspec_formatter.rb +24 -0
  5. data/.rubocop.yml +109 -0
  6. data/.travis.yml +30 -0
  7. data/.yardopts +2 -0
  8. data/CODE_OF_CONDUCT.md +46 -0
  9. data/CONTRIBUTING.md +65 -0
  10. data/Gemfile +20 -0
  11. data/Guardfile +7 -0
  12. data/LICENSE.md +21 -0
  13. data/README.md +654 -0
  14. data/Rakefile +12 -0
  15. data/daru-io.gemspec +39 -0
  16. data/lib/daru/io.rb +3 -0
  17. data/lib/daru/io/base.rb +45 -0
  18. data/lib/daru/io/exporters.rb +1 -0
  19. data/lib/daru/io/exporters/avro.rb +96 -0
  20. data/lib/daru/io/exporters/base.rb +54 -0
  21. data/lib/daru/io/exporters/csv.rb +103 -0
  22. data/lib/daru/io/exporters/excel.rb +148 -0
  23. data/lib/daru/io/exporters/json.rb +570 -0
  24. data/lib/daru/io/exporters/r_data.rb +66 -0
  25. data/lib/daru/io/exporters/rds.rb +79 -0
  26. data/lib/daru/io/exporters/sql.rb +55 -0
  27. data/lib/daru/io/importers.rb +1 -0
  28. data/lib/daru/io/importers/active_record.rb +75 -0
  29. data/lib/daru/io/importers/avro.rb +54 -0
  30. data/lib/daru/io/importers/base.rb +62 -0
  31. data/lib/daru/io/importers/csv.rb +190 -0
  32. data/lib/daru/io/importers/excel.rb +99 -0
  33. data/lib/daru/io/importers/excelx.rb +138 -0
  34. data/lib/daru/io/importers/html.rb +144 -0
  35. data/lib/daru/io/importers/json.rb +152 -0
  36. data/lib/daru/io/importers/mongo.rb +139 -0
  37. data/lib/daru/io/importers/plaintext.rb +97 -0
  38. data/lib/daru/io/importers/r_data.rb +74 -0
  39. data/lib/daru/io/importers/rds.rb +67 -0
  40. data/lib/daru/io/importers/redis.rb +135 -0
  41. data/lib/daru/io/importers/sql.rb +127 -0
  42. data/lib/daru/io/link.rb +80 -0
  43. data/lib/daru/io/version.rb +5 -0
  44. metadata +269 -0
@@ -0,0 +1,570 @@
1
+ require 'daru/io/exporters/base'
2
+
3
+ module Daru
4
+ module IO
5
+ module Exporters
6
+ # JSON Exporter Class, that extends `to_json`, `to_json_string` and `write_json` methods
7
+ # to `Daru::DataFrame` instance variables
8
+ class JSON < Base
9
+ Daru::DataFrame.register_io_module :to_json, self
10
+ Daru::DataFrame.register_io_module :to_json_string, self
11
+ Daru::DataFrame.register_io_module :write_json, self
12
+
13
+ ORIENT_TYPES = %i[index records split values].freeze
14
+
15
+ # Initializes a JSON Exporter instance.
16
+ #
17
+ # @param dataframe [Daru::DataFrame] A dataframe to export
18
+ # @param orient [Symbol] Setting to export the data in a specific structure.
19
+ # Defaults to `:records`.
20
+ #
21
+ # - `:values` : Returns a 2D array containing the data in the DataFrame.
22
+ # - `:split` : Returns a `Hash`, containing keys `:vectors`, `:index` and `:data`.
23
+ # - `:records` : Returns an Array of Hashes with given JsonPath content.
24
+ # - `:index` : Returns a Hash of Hashes with index values as keys,
25
+ # and given JsonPath content as values.
26
+ #
27
+ # After choosing an `:orient` option, the JSON content can be manipulated before
28
+ # writing into the JSON file, by providing a block.
29
+ #
30
+ # @param pretty [Boolean] When set to true, the data is pretty-printed to the
31
+ # JSON file.
32
+ # @param jsonpaths [Hash] JsonPaths to export given vectors into a compexly nested
33
+ # JSON structure.
34
+ #
35
+ # @example Initializing a JSON Exporter instance
36
+ # df = Daru::DataFrame.new(
37
+ # [
38
+ # {name: 'Jon Snow', age: 18, sex: 'Male'},
39
+ # {name: 'Rhaegar Targaryen', age: 54, sex: 'Male'},
40
+ # {name: 'Lyanna Stark', age: 36, sex: 'Female'}
41
+ # ],
42
+ # order: %i[name age sex],
43
+ # index: %i[child dad mom]
44
+ # )
45
+ #
46
+ # #=> #<Daru::DataFrame(3x3)>
47
+ # # name age sex
48
+ # # child Jon Snow 18 Male
49
+ # # dad Rhaegar Ta 54 Male
50
+ # # mom Lyanna Sta 36 Female
51
+ #
52
+ # json_exporter = Daru::IO::Exporters::JSON
53
+ #
54
+ # index_instance = json_exporter.new(df, orient: :index, pretty: true)
55
+ # records_instance = json_exporter.new(df,orient: :records, pretty: true)
56
+ # values_instance = json_exporter.new(df, orient: :values, pretty: true)
57
+ # split_instance = json_exporter.new(df, orient: :split, pretty: true)
58
+ # static_jsonpath_instance = json_exporter.new(
59
+ # df, pretty: true, name: '$.specific.name', age: '$.common.age', sex: '$.common.gender'
60
+ # )
61
+ # dynamic_jsonpath_instance = json_exporter.new(
62
+ # df, pretty: true, age: '$.{name}.age', sex: '$.{name}.gender'
63
+ # )
64
+ # block_instance = json_exporter.new(df, orient: :index, pretty: true) do |json|
65
+ # json.map { |j| [j.keys.first, j.values.first] }.to_h
66
+ # end
67
+ def initialize(dataframe, orient: :records, pretty: false, **jsonpaths, &block)
68
+ require 'json'
69
+ optional_gem 'jsonpath'
70
+
71
+ super(dataframe)
72
+ @block = block
73
+ @orient = orient
74
+ @pretty = pretty
75
+ @jsonpath_hash = jsonpaths.empty? ? nil : jsonpaths
76
+
77
+ validate_params
78
+ end
79
+
80
+ # Exports a JSON Exporter instance to a file-writable String.
81
+ #
82
+ # @return [String] A file-writable string
83
+ #
84
+ # @example Getting a file-writable string with default orient: :records
85
+ # records_instance.to_s
86
+ #
87
+ # #=>
88
+ # # [
89
+ # # {
90
+ # # "sex": "Male",
91
+ # # "age": 18,
92
+ # # "name": "Jon Snow"
93
+ # # },
94
+ # # {
95
+ # # "sex": "Male",
96
+ # # "age": 54,
97
+ # # "name": "Rhaegar Targaryen"
98
+ # # },
99
+ # # {
100
+ # # "sex": "Female",
101
+ # # "age": 36,
102
+ # # "name": "Lyanna Stark"
103
+ # # }
104
+ # # ]
105
+ #
106
+ # @example Getting a file-writable string with orient: :index
107
+ # index_instance.to_s
108
+ #
109
+ # #=>
110
+ # # [
111
+ # # {
112
+ # # "child": {
113
+ # # "sex": "Male",
114
+ # # "age": 18,
115
+ # # "name": "Jon Snow"
116
+ # # }
117
+ # # },
118
+ # # {
119
+ # # "dad": {
120
+ # # "sex": "Male",
121
+ # # "age": 54,
122
+ # # "name": "Rhaegar Targaryen"
123
+ # # }
124
+ # # },
125
+ # # {
126
+ # # "mom": {
127
+ # # "sex": "Female",
128
+ # # "age": 36,
129
+ # # "name": "Lyanna Stark"
130
+ # # }
131
+ # # }
132
+ # # ]
133
+ #
134
+ # @example Getting a file-writable string with orient: :values
135
+ # values_instance.to_s
136
+ #
137
+ # #=>
138
+ # # [
139
+ # # [
140
+ # # "Jon Snow",
141
+ # # "Rhaegar Targaryen",
142
+ # # "Lyanna Stark"
143
+ # # ],
144
+ # # [
145
+ # # 18,
146
+ # # 54,
147
+ # # 36
148
+ # # ],
149
+ # # [
150
+ # # "Male",
151
+ # # "Male",
152
+ # # "Female"
153
+ # # ]
154
+ # # ]
155
+ #
156
+ # @example Getting a file-writable string with orient: :split
157
+ # split_instance.to_s
158
+ #
159
+ # #=>
160
+ # # {
161
+ # # "vectors": [
162
+ # # "name",
163
+ # # "age",
164
+ # # "sex"
165
+ # # ],
166
+ # # "index": [
167
+ # # "child",
168
+ # # "dad",
169
+ # # "mom"
170
+ # # ],
171
+ # # "data": [
172
+ # # [
173
+ # # "Jon Snow",
174
+ # # "Rhaegar Targaryen",
175
+ # # "Lyanna Stark"
176
+ # # ],
177
+ # # [
178
+ # # 18,
179
+ # # 54,
180
+ # # 36
181
+ # # ],
182
+ # # [
183
+ # # "Male",
184
+ # # "Male",
185
+ # # "Female"
186
+ # # ]
187
+ # # ]
188
+ # # }
189
+ #
190
+ # @example Getting a file-writable string with static nested JsonPaths
191
+ # static_jsonpath_instance.to_s
192
+ #
193
+ # #=>
194
+ # # [
195
+ # # {
196
+ # # "common": {
197
+ # # "gender": "Male",
198
+ # # "age": 18
199
+ # # },
200
+ # # "specific": {
201
+ # # "name": "Jon Snow"
202
+ # # }
203
+ # # },
204
+ # # {
205
+ # # "common": {
206
+ # # "gender": "Male",
207
+ # # "age": 54
208
+ # # },
209
+ # # "specific": {
210
+ # # "name": "Rhaegar Targaryen"
211
+ # # }
212
+ # # },
213
+ # # {
214
+ # # "common": {
215
+ # # "gender": "Female",
216
+ # # "age": 36
217
+ # # },
218
+ # # "specific": {
219
+ # # "name": "Lyanna Stark"
220
+ # # }
221
+ # # }
222
+ # # ]
223
+ #
224
+ # @example Getting a file-writable string with dynamic JsonPaths
225
+ # dynamic_jsonpath_instance.to_s
226
+ #
227
+ # #=>
228
+ # # [
229
+ # # {
230
+ # # "Jon Snow": {
231
+ # # "gender": "Male",
232
+ # # "age": 18
233
+ # # }
234
+ # # },
235
+ # # {
236
+ # # "Rhaegar Targaryen": {
237
+ # # "gender": "Male",
238
+ # # "age": 54
239
+ # # }
240
+ # # },
241
+ # # {
242
+ # # "Lyanna Stark": {
243
+ # # "gender": "Female",
244
+ # # "age": 36
245
+ # # }
246
+ # # }
247
+ # # ]
248
+ #
249
+ # @example Getting a file-writable string with orient: :index and block
250
+ # block_instance.to_s
251
+ #
252
+ # #=>
253
+ # # {
254
+ # # "child": {
255
+ # # "sex": "Male",
256
+ # # "age": 18,
257
+ # # "name": "Jon Snow"
258
+ # # },
259
+ # # "dad": {
260
+ # # "sex": "Male",
261
+ # # "age": 54,
262
+ # # "name": "Rhaegar Targaryen"
263
+ # # },
264
+ # # "mom": {
265
+ # # "sex": "Female",
266
+ # # "age": 36,
267
+ # # "name": "Lyanna Stark"
268
+ # # }
269
+ # # }
270
+ def to_s
271
+ super
272
+ end
273
+
274
+ # Exports a JSON Exporter instance to a Ruby structure comprising of Arrays & Hashes.
275
+ #
276
+ # @return [Array or Hash]
277
+ #
278
+ # @example With default orient: :records
279
+ # records_instance.to
280
+ #
281
+ # #=>
282
+ # # [
283
+ # # {
284
+ # # "sex": "Male",
285
+ # # "age": 18,
286
+ # # "name": "Jon Snow"
287
+ # # },
288
+ # # {
289
+ # # "sex": "Male",
290
+ # # "age": 54,
291
+ # # "name": "Rhaegar Targaryen"
292
+ # # },
293
+ # # {
294
+ # # "sex": "Female",
295
+ # # "age": 36,
296
+ # # "name": "Lyanna Stark"
297
+ # # }
298
+ # # ]
299
+ #
300
+ # @example With orient: :index
301
+ # index_instance.to
302
+ #
303
+ # #=>
304
+ # # [
305
+ # # {
306
+ # # "child": {
307
+ # # "sex": "Male",
308
+ # # "age": 18,
309
+ # # "name": "Jon Snow"
310
+ # # }
311
+ # # },
312
+ # # {
313
+ # # "dad": {
314
+ # # "sex": "Male",
315
+ # # "age": 54,
316
+ # # "name": "Rhaegar Targaryen"
317
+ # # }
318
+ # # },
319
+ # # {
320
+ # # "mom": {
321
+ # # "sex": "Female",
322
+ # # "age": 36,
323
+ # # "name": "Lyanna Stark"
324
+ # # }
325
+ # # }
326
+ # # ]
327
+ #
328
+ # @example With orient: :values
329
+ # values_instance.to
330
+ #
331
+ # #=>
332
+ # # [
333
+ # # [
334
+ # # "Jon Snow",
335
+ # # "Rhaegar Targaryen",
336
+ # # "Lyanna Stark"
337
+ # # ],
338
+ # # [
339
+ # # 18,
340
+ # # 54,
341
+ # # 36
342
+ # # ],
343
+ # # [
344
+ # # "Male",
345
+ # # "Male",
346
+ # # "Female"
347
+ # # ]
348
+ # # ]
349
+ #
350
+ # @example With orient: :split
351
+ # split_instance.to
352
+ #
353
+ # #=>
354
+ # # {
355
+ # # "vectors": [
356
+ # # "name",
357
+ # # "age",
358
+ # # "sex"
359
+ # # ],
360
+ # # "index": [
361
+ # # "child",
362
+ # # "dad",
363
+ # # "mom"
364
+ # # ],
365
+ # # "data": [
366
+ # # [
367
+ # # "Jon Snow",
368
+ # # "Rhaegar Targaryen",
369
+ # # "Lyanna Stark"
370
+ # # ],
371
+ # # [
372
+ # # 18,
373
+ # # 54,
374
+ # # 36
375
+ # # ],
376
+ # # [
377
+ # # "Male",
378
+ # # "Male",
379
+ # # "Female"
380
+ # # ]
381
+ # # ]
382
+ # # }
383
+ #
384
+ # @example With static nested JsonPaths
385
+ # static_jsonpath_instance.to
386
+ #
387
+ # #=>
388
+ # # [
389
+ # # {
390
+ # # "common": {
391
+ # # "gender": "Male",
392
+ # # "age": 18
393
+ # # },
394
+ # # "specific": {
395
+ # # "name": "Jon Snow"
396
+ # # }
397
+ # # },
398
+ # # {
399
+ # # "common": {
400
+ # # "gender": "Male",
401
+ # # "age": 54
402
+ # # },
403
+ # # "specific": {
404
+ # # "name": "Rhaegar Targaryen"
405
+ # # }
406
+ # # },
407
+ # # {
408
+ # # "common": {
409
+ # # "gender": "Female",
410
+ # # "age": 36
411
+ # # },
412
+ # # "specific": {
413
+ # # "name": "Lyanna Stark"
414
+ # # }
415
+ # # }
416
+ # # ]
417
+ #
418
+ # @example With dynamic JsonPaths
419
+ # dynamic_jsonpath_instance.to
420
+ #
421
+ # #=>
422
+ # # [
423
+ # # {
424
+ # # "Jon Snow": {
425
+ # # "gender": "Male",
426
+ # # "age": 18
427
+ # # }
428
+ # # },
429
+ # # {
430
+ # # "Rhaegar Targaryen": {
431
+ # # "gender": "Male",
432
+ # # "age": 54
433
+ # # }
434
+ # # },
435
+ # # {
436
+ # # "Lyanna Stark": {
437
+ # # "gender": "Female",
438
+ # # "age": 36
439
+ # # }
440
+ # # }
441
+ # # ]
442
+ #
443
+ # @example With orient: :index and block
444
+ # block_instance.to
445
+ #
446
+ # #=>
447
+ # # {
448
+ # # "child": {
449
+ # # "sex": "Male",
450
+ # # "age": 18,
451
+ # # "name": "Jon Snow"
452
+ # # },
453
+ # # "dad": {
454
+ # # "sex": "Male",
455
+ # # "age": 54,
456
+ # # "name": "Rhaegar Targaryen"
457
+ # # },
458
+ # # "mom": {
459
+ # # "sex": "Female",
460
+ # # "age": 36,
461
+ # # "name": "Lyanna Stark"
462
+ # # }
463
+ # # }
464
+ def to
465
+ @jsonpath_hash ||= @dataframe.vectors.to_a.map { |v| {v => "$.#{v}"} }.reduce(:merge)
466
+ @vectors = @jsonpath_hash.keys
467
+ @jsonpaths = process_jsonpath
468
+ @json_content = process_json_content
469
+ @json_content = @block.call(@json_content) if @block
470
+
471
+ @json_content
472
+ end
473
+
474
+ # Exports a JSON Exporter instance to a json file.
475
+ #
476
+ # @param path [String] Path of JSON file where the dataframe is to be saved
477
+ #
478
+ # @example Writing a JSON Exporter instance to a JSON file
479
+ # index_instance.write('index.json')
480
+ # split_instance.write('split.json')
481
+ # values_instance.write('values.json')
482
+ # records_instance.write('records.json')
483
+ # static_jsonpath_instance.write('static.json')
484
+ # dynamic_jsonpath_instance.write('dynamic.json')
485
+ # block_instance.write('block.json')
486
+ def write(path)
487
+ File.open(path, 'w') do |file|
488
+ file.write(::JSON.send(@pretty ? :pretty_generate : :generate, to))
489
+ end
490
+ end
491
+
492
+ private
493
+
494
+ def both_are?(class_name, obj1, obj2)
495
+ obj1.is_a?(class_name) && obj2.is_a?(class_name)
496
+ end
497
+
498
+ def deep_merge(source, dest)
499
+ return source if dest.nil?
500
+ return dest if source.nil?
501
+
502
+ return dest | source if both_are?(Array, source, dest)
503
+ return source unless both_are?(Hash, source, dest)
504
+
505
+ source.each do |src_key, src_value|
506
+ dest[src_key] = dest[src_key] ? deep_merge(src_value, dest[src_key]) : src_value
507
+ end
508
+ dest
509
+ end
510
+
511
+ def handle_dynamic_keys(sub_path, idx, row)
512
+ return idx.to_sym if sub_path.to_s == 'index}'
513
+ if sub_path.to_s.end_with? '}'
514
+ val = row[sub_path.to_s.delete('}').to_sym]
515
+ return val.to_i if val.to_i.to_s == val
516
+ return val.to_sym
517
+ end
518
+ sub_path
519
+ end
520
+
521
+ def init_hash_rec(jsonpaths, hash, jsonpath_key, row, idx)
522
+ key = handle_dynamic_keys(jsonpaths[0], idx, row)
523
+ if jsonpaths.count == 1
524
+ hash[key] = jsonpath_key == :index ? idx : row[jsonpath_key]
525
+ else
526
+ init_hash_rec(jsonpaths[1..-1], hash[key], jsonpath_key, row, idx)
527
+ end
528
+ hash
529
+ end
530
+
531
+ def init_hash(jsonpaths, jsonpath_keys, row, idx)
532
+ jsonpaths.map.with_index do |path, i|
533
+ init_hash_rec(path, Hash.new { |h,k| h[k] = Hash.new(&h.default_proc) }, jsonpath_keys[i], row, idx)
534
+ end.reduce { |cumulative, current| deep_merge(cumulative, current) }
535
+ end
536
+
537
+ def process_json_content
538
+ return @dataframe.map_vectors(&:to_a) if @orient == :values
539
+
540
+ if @orient == :split
541
+ return {
542
+ vectors: @dataframe.vectors.to_a,
543
+ index: @dataframe.index.to_a,
544
+ data: @dataframe.map_vectors(&:to_a)
545
+ }
546
+ end
547
+
548
+ @dataframe.map_rows_with_index do |row, idx|
549
+ next init_hash(@jsonpaths, @vectors, row, idx) if @orient == :records
550
+ {idx => init_hash(@jsonpaths, @vectors, row, idx)}
551
+ end
552
+ end
553
+
554
+ def process_jsonpath
555
+ @jsonpath_hash.values.map do |x|
556
+ (JsonPath.new(x).path - %w[$ ${ . .. ..{]).map do |y|
557
+ v = y.delete("'.[]{")
558
+ next v.to_i if v.to_i.to_s == v
559
+ v.to_sym
560
+ end
561
+ end
562
+ end
563
+
564
+ def validate_params
565
+ raise ArgumentError, "Invalid orient option '#{@orient}' given." unless ORIENT_TYPES.include?(@orient)
566
+ end
567
+ end
568
+ end
569
+ end
570
+ end