red_amber 0.4.1 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +11 -5
- data/CHANGELOG.md +93 -1
- data/Gemfile +5 -6
- data/README.ja.md +252 -0
- data/README.md +30 -23
- data/benchmark/basic.yml +1 -1
- data/benchmark/group.yml +12 -5
- data/doc/CODE_OF_CONDUCT.md +1 -1
- data/docker/.env +4 -0
- data/docker/Dockerfile +66 -0
- data/docker/Gemfile +26 -0
- data/docker/Gemfile.lock +118 -0
- data/docker/docker-compose.yml +21 -0
- data/docker/example +86 -0
- data/docker/notebook/examples_of_red_amber.ipynb +8562 -0
- data/docker/notebook/red-amber.ipynb +188 -0
- data/docker/readme.md +118 -0
- data/lib/red_amber/data_frame.rb +78 -4
- data/lib/red_amber/data_frame_combinable.rb +147 -119
- data/lib/red_amber/data_frame_displayable.rb +6 -5
- data/lib/red_amber/data_frame_selectable.rb +49 -0
- data/lib/red_amber/group.rb +190 -89
- data/lib/red_amber/helper.rb +26 -0
- data/lib/red_amber/subframes.rb +166 -66
- data/lib/red_amber/vector.rb +43 -24
- data/lib/red_amber/vector_aggregation.rb +26 -0
- data/lib/red_amber/vector_binary_element_wise.rb +54 -25
- data/lib/red_amber/vector_selectable.rb +74 -23
- data/lib/red_amber/vector_string_function.rb +211 -0
- data/lib/red_amber/vector_unary_element_wise.rb +4 -0
- data/lib/red_amber/vector_updatable.rb +28 -0
- data/lib/red_amber/version.rb +1 -1
- data/lib/red_amber.rb +2 -1
- data/red_amber.gemspec +4 -4
- metadata +20 -9
@@ -221,6 +221,11 @@ module RedAmber
|
|
221
221
|
# - Same as `#join` with `type: :inner`
|
222
222
|
# - A kind of mutating join.
|
223
223
|
#
|
224
|
+
# @note the order of joined results will be preserved by default.
|
225
|
+
# This is enabled by appending index column to sort after joining but
|
226
|
+
# it will cause some performance degradation. If you don't matter
|
227
|
+
# the order of the result, set `force_order` option to `false`.
|
228
|
+
#
|
224
229
|
# @overload inner_join(other, suffix: '.1', force_order: true)
|
225
230
|
# If `join_key` is not specified, common keys in self and other are used
|
226
231
|
# (natural keys). Returns joined dataframe.
|
@@ -280,6 +285,11 @@ module RedAmber
|
|
280
285
|
# - Same as `#join` with `type: :full_outer`
|
281
286
|
# - A kind of mutating join.
|
282
287
|
#
|
288
|
+
# @note the order of joined results will be preserved by default.
|
289
|
+
# This is enabled by appending index column to sort after joining but
|
290
|
+
# it will cause some performance degradation. If you don't matter
|
291
|
+
# the order of the result, set `force_order` option to `false`.
|
292
|
+
#
|
283
293
|
# @overload full_join(other, suffix: '.1', force_order: true)
|
284
294
|
# If `join_key` is not specified, common keys in self and other are used
|
285
295
|
# (natural keys). Returns joined dataframe.
|
@@ -348,6 +358,11 @@ module RedAmber
|
|
348
358
|
# - Same as `#join` with `type: :left_outer`
|
349
359
|
# - A kind of mutating join.
|
350
360
|
#
|
361
|
+
# @note the order of joined results will be preserved by default.
|
362
|
+
# This is enabled by appending index column to sort after joining but
|
363
|
+
# it will cause some performance degradation. If you don't matter
|
364
|
+
# the order of the result, set `force_order` option to `false`.
|
365
|
+
#
|
351
366
|
# @overload left_join(other, suffix: '.1', force_order: true)
|
352
367
|
# If `join_key` is not specified, common keys in self and other are used
|
353
368
|
# (natural keys). Returns joined dataframe.
|
@@ -410,6 +425,11 @@ module RedAmber
|
|
410
425
|
# - Same as `#join` with `type: :right_outer`
|
411
426
|
# - A kind of mutating join.
|
412
427
|
#
|
428
|
+
# @note the order of joined results will be preserved by default.
|
429
|
+
# This is enabled by appending index column to sort after joining but
|
430
|
+
# it will cause some performance degradation. If you don't matter
|
431
|
+
# the order of the result, set `force_order` option to `false`.
|
432
|
+
#
|
413
433
|
# @overload right_join(other, suffix: '.1', force_order: true)
|
414
434
|
# If `join_key` is not specified, common keys in self and other are used
|
415
435
|
# (natural keys). Returns joined dataframe.
|
@@ -422,11 +442,11 @@ module RedAmber
|
|
422
442
|
# df.right_join(other)
|
423
443
|
#
|
424
444
|
# # =>
|
425
|
-
#
|
426
|
-
# <
|
427
|
-
# 0 A
|
428
|
-
# 1 B
|
429
|
-
# 2
|
445
|
+
# X1 KEY X2
|
446
|
+
# <uint8> <string> <boolean>
|
447
|
+
# 0 1 A true
|
448
|
+
# 1 2 B false
|
449
|
+
# 2 (nil) D (nil)
|
430
450
|
#
|
431
451
|
# @overload right_join(other, join_keys, suffix: '.1', force_order: true)
|
432
452
|
#
|
@@ -439,11 +459,11 @@ module RedAmber
|
|
439
459
|
# df.right_join(other, :KEY)
|
440
460
|
#
|
441
461
|
# # =>
|
442
|
-
#
|
443
|
-
# <
|
444
|
-
# 0 A
|
445
|
-
# 1 B
|
446
|
-
# 2
|
462
|
+
# X1 KEY X2
|
463
|
+
# <uint8> <string> <boolean>
|
464
|
+
# 0 1 A true
|
465
|
+
# 1 2 B false
|
466
|
+
# 2 (nil) D (nil)
|
447
467
|
#
|
448
468
|
# @overload right_join(other, join_key_pairs, suffix: '.1', force_order: true)
|
449
469
|
#
|
@@ -456,11 +476,11 @@ module RedAmber
|
|
456
476
|
# df2.right_join(other2, { left: :KEY1, right: :KEY2 })
|
457
477
|
#
|
458
478
|
# # =>
|
459
|
-
#
|
460
|
-
# <
|
461
|
-
# 0 A
|
462
|
-
# 1 B
|
463
|
-
# 2
|
479
|
+
# X1 KEY2 X2
|
480
|
+
# <uint8> >string> <boolean>
|
481
|
+
# 0 1 A true
|
482
|
+
# 1 2 B false
|
483
|
+
# 2 (nil) D (nil)
|
464
484
|
#
|
465
485
|
# @since 0.2.3
|
466
486
|
#
|
@@ -480,6 +500,11 @@ module RedAmber
|
|
480
500
|
# - Same as `#join` with `type: :left_semi`
|
481
501
|
# - A kind of filtering join.
|
482
502
|
#
|
503
|
+
# @note the order of joined results will be preserved by default.
|
504
|
+
# This is enabled by appending index column to sort after joining but
|
505
|
+
# it will cause some performance degradation. If you don't matter
|
506
|
+
# the order of the result, set `force_order` option to `false`.
|
507
|
+
#
|
483
508
|
# @overload semi_join(other, suffix: '.1', force_order: true)
|
484
509
|
# If `join_key` is not specified, common keys in self and other are used
|
485
510
|
# (natural keys). Returns joined dataframe.
|
@@ -539,6 +564,11 @@ module RedAmber
|
|
539
564
|
# - Same as `#join` with `type: :left_anti`
|
540
565
|
# - A kind of filtering join.
|
541
566
|
#
|
567
|
+
# @note the order of joined results will be preserved by default.
|
568
|
+
# This is enabled by appending index column to sort after joining but
|
569
|
+
# it will cause some performance degradation. If you don't matter
|
570
|
+
# the order of the result, set `force_order` option to `false`.
|
571
|
+
#
|
542
572
|
# @overload anti_join(other, suffix: '.1', force_order: true)
|
543
573
|
# If `join_key` is not specified, common keys in self and other are used
|
544
574
|
# (natural keys). Returns joined dataframe.
|
@@ -661,7 +691,7 @@ module RedAmber
|
|
661
691
|
raise DataFrameArgumentError, 'keys are not same with self and other'
|
662
692
|
end
|
663
693
|
|
664
|
-
join(other, keys, type: :full_outer)
|
694
|
+
join(other, keys, type: :full_outer, force_order: true)
|
665
695
|
end
|
666
696
|
|
667
697
|
# Select records appearing in self but not in other.
|
@@ -733,12 +763,12 @@ module RedAmber
|
|
733
763
|
# 1 B E
|
734
764
|
# 2 C F
|
735
765
|
|
736
|
-
# @note the order of joined results
|
737
|
-
#
|
738
|
-
#
|
739
|
-
#
|
766
|
+
# @note the order of joined results may not be preserved by default.
|
767
|
+
# if you prefer to preserve the order of the result, set `force_order` option
|
768
|
+
# to `true`. This is enabled by appending index column to sort after joining
|
769
|
+
# so it will cause some performance degradation.
|
740
770
|
#
|
741
|
-
# @overload join(other, type: :inner, suffix: '.1', force_order:
|
771
|
+
# @overload join(other, type: :inner, suffix: '.1', force_order: false)
|
742
772
|
#
|
743
773
|
# If `join_key` is not specified, common keys in self and other are used
|
744
774
|
# (natural keys). Returns joined dataframe.
|
@@ -767,7 +797,7 @@ module RedAmber
|
|
767
797
|
# 2 C 3 (nil)
|
768
798
|
# 3 D (nil) (nil)
|
769
799
|
#
|
770
|
-
# @overload join(other, join_keys, type: :inner, suffix: '.1', force_order:
|
800
|
+
# @overload join(other, join_keys, type: :inner, suffix: '.1', force_order: false)
|
771
801
|
#
|
772
802
|
# @macro join_before
|
773
803
|
# @macro join_key_in_array
|
@@ -792,7 +822,8 @@ module RedAmber
|
|
792
822
|
# 0 A 1 1
|
793
823
|
# 1 B 2 4
|
794
824
|
#
|
795
|
-
# @overload join(
|
825
|
+
# @overload join(
|
826
|
+
# other, join_key_pairs, type: :inner, suffix: '.1', force_order: false)
|
796
827
|
#
|
797
828
|
# @macro join_before
|
798
829
|
# @macro join_key_in_hash
|
@@ -828,7 +859,8 @@ module RedAmber
|
|
828
859
|
#
|
829
860
|
# @since 0.2.3
|
830
861
|
#
|
831
|
-
def join(other, join_keys = nil, type: :inner, suffix: '.1', force_order:
|
862
|
+
def join(other, join_keys = nil, type: :inner, suffix: '.1', force_order: false)
|
863
|
+
left_table = table
|
832
864
|
right_table =
|
833
865
|
case other
|
834
866
|
when DataFrame
|
@@ -839,24 +871,26 @@ module RedAmber
|
|
839
871
|
raise DataFrameArgumentError, 'other must be a DataFrame or an Arrow::Table'
|
840
872
|
end
|
841
873
|
|
842
|
-
type = type.to_sym
|
843
|
-
left_index = :__LEFT_INDEX__
|
844
|
-
right_index = :__RIGHT_INDEX__
|
845
874
|
if force_order
|
875
|
+
left_index = :__LEFT_INDEX__
|
876
|
+
right_index = :__RIGHT_INDEX__
|
846
877
|
left_table = assign(left_index) { indices }.table
|
847
878
|
other = DataFrame.create(other) if other.is_a?(Arrow::Table)
|
848
879
|
right_table = other.assign(right_index) { indices }.table
|
849
|
-
else
|
850
|
-
left_table = table
|
851
880
|
end
|
852
881
|
|
853
|
-
|
854
|
-
|
855
|
-
|
882
|
+
left_table_keys = ensure_keys(left_table.keys)
|
883
|
+
right_table_keys = ensure_keys(right_table.keys)
|
856
884
|
# natural keys (implicit common keys)
|
857
|
-
join_keys ||=
|
885
|
+
join_keys ||= left_table_keys.intersection(right_table_keys)
|
886
|
+
|
887
|
+
type = Arrow::JoinType.try_convert(type) || type
|
888
|
+
type_nick = type.nick
|
889
|
+
|
890
|
+
plan = Arrow::ExecutePlan.new
|
891
|
+
left_node = plan.build_source_node(left_table)
|
892
|
+
right_node = plan.build_source_node(right_table)
|
858
893
|
|
859
|
-
# This is not necessary if additional procedure is contributed to Red Arrow.
|
860
894
|
if join_keys.is_a?(Hash)
|
861
895
|
left_keys = ensure_keys(join_keys[:left])
|
862
896
|
right_keys = ensure_keys(join_keys[:right])
|
@@ -865,116 +899,110 @@ module RedAmber
|
|
865
899
|
right_keys = left_keys
|
866
900
|
end
|
867
901
|
|
868
|
-
|
869
|
-
|
870
|
-
|
871
|
-
|
872
|
-
|
873
|
-
|
874
|
-
|
875
|
-
|
876
|
-
|
877
|
-
|
902
|
+
context =
|
903
|
+
[type_nick, left_table_keys, right_table_keys, left_keys, right_keys, suffix]
|
904
|
+
|
905
|
+
hash_join_node_options = Arrow::HashJoinNodeOptions.new(type, left_keys, right_keys)
|
906
|
+
case type_nick
|
907
|
+
when 'inner', 'left-outer'
|
908
|
+
hash_join_node_options.left_outputs = left_table_keys
|
909
|
+
hash_join_node_options.right_outputs = right_table_keys - right_keys
|
910
|
+
when 'right-outer'
|
911
|
+
hash_join_node_options.left_outputs = left_table_keys - left_keys
|
912
|
+
hash_join_node_options.right_outputs = right_table_keys
|
878
913
|
end
|
879
914
|
|
880
|
-
|
881
|
-
|
882
|
-
|
883
|
-
|
884
|
-
|
885
|
-
|
886
|
-
|
887
|
-
|
888
|
-
)
|
889
|
-
|
890
|
-
case type
|
891
|
-
when :inner, :left_outer, :left_semi, :left_anti, :right_semi, :right_anti
|
892
|
-
dataframe =
|
893
|
-
if joined_table.keys.uniq!
|
894
|
-
DataFrame.create(rename_table(joined_table, n_keys, suffix))
|
895
|
-
else
|
896
|
-
DataFrame.create(joined_table)
|
897
|
-
end
|
915
|
+
hash_join_node =
|
916
|
+
plan.build_hash_join_node(left_node, right_node, hash_join_node_options)
|
917
|
+
merge_node = merge_keys(plan, hash_join_node, context)
|
918
|
+
rename_node = rename_keys(plan, merge_node, context)
|
919
|
+
joined_table = sink_and_start_plan(plan, rename_node)
|
920
|
+
|
921
|
+
df = DataFrame.create(joined_table)
|
922
|
+
if force_order
|
898
923
|
sorter =
|
899
|
-
case
|
900
|
-
when
|
901
|
-
[left_index, right_index]
|
902
|
-
when :left_semi, :left_anti
|
903
|
-
[left_index]
|
904
|
-
when :right_semi, :right_anti
|
924
|
+
case type_nick
|
925
|
+
when 'right-semi', 'right-anti'
|
905
926
|
[right_index]
|
906
|
-
|
907
|
-
|
908
|
-
key_index_lr =
|
909
|
-
left_keys.map { left_table.keys.index(_1) }
|
910
|
-
.zip(right_keys.map { left_table.keys.size + right_table.keys.index(_1) })
|
911
|
-
renamed_table = rename_table(joined_table, n_keys, suffix)
|
912
|
-
dropper = []
|
913
|
-
dataframe =
|
914
|
-
DataFrame.create(renamed_table).assign do |df|
|
915
|
-
key_index_lr.map do |l, r|
|
916
|
-
dropper << df.keys[r]
|
917
|
-
[df.keys[l], merge_array(df.vectors[l].data, df.vectors[r].data)]
|
918
|
-
end
|
919
|
-
end
|
920
|
-
dataframe = dataframe.drop(dropper)
|
921
|
-
sorter = [left_index, right_index]
|
922
|
-
when :right_outer
|
923
|
-
dataframe =
|
924
|
-
if joined_table.keys.uniq!
|
925
|
-
DataFrame.create(rename_table(joined_table, left_outputs.size, suffix))
|
927
|
+
when 'left-semi', 'left-anti'
|
928
|
+
[left_index]
|
926
929
|
else
|
927
|
-
|
930
|
+
[left_index, right_index]
|
928
931
|
end
|
929
|
-
|
930
|
-
sorter = [left_index, right_index]
|
931
|
-
end
|
932
|
-
|
933
|
-
if force_order
|
934
|
-
dataframe
|
935
|
-
.sort(sorter)
|
932
|
+
df.sort(sorter)
|
936
933
|
.drop(sorter)
|
937
934
|
else
|
938
|
-
|
935
|
+
df
|
939
936
|
end
|
940
937
|
end
|
941
938
|
|
942
939
|
private
|
943
940
|
|
944
|
-
# To ensure Array of
|
941
|
+
# To ensure Array of Strings
|
945
942
|
def ensure_keys(keys)
|
946
|
-
Array(keys).map(&:
|
943
|
+
Array(keys).map(&:to_s)
|
944
|
+
end
|
945
|
+
|
946
|
+
# Merge key columns and preserve as left and remove right.
|
947
|
+
def merge_keys(plan, input_node, context)
|
948
|
+
type_nick, left_table_keys, right_table_keys, left_keys, right_keys, * = context
|
949
|
+
return input_node unless type_nick == 'full-outer'
|
950
|
+
|
951
|
+
left_indices = left_keys.map { left_table_keys.index(_1) }
|
952
|
+
right_offset = left_table_keys.size
|
953
|
+
right_indices = right_keys.map { right_table_keys.index(_1) + right_offset }
|
954
|
+
expressions = []
|
955
|
+
names = []
|
956
|
+
left_table_keys.each_with_index do |key, index|
|
957
|
+
names << key
|
958
|
+
expressions <<
|
959
|
+
if (i = left_indices.index(index))
|
960
|
+
left_field = Arrow::FieldExpression.new("[#{left_indices[i]}]")
|
961
|
+
right_field = Arrow::FieldExpression.new("[#{right_indices[i]}]")
|
962
|
+
is_left_null = Arrow::CallExpression.new('is_null', [left_field])
|
963
|
+
Arrow::CallExpression.new('if_else', [is_left_null, right_field, left_field])
|
964
|
+
else
|
965
|
+
Arrow::FieldExpression.new("[#{index}]")
|
966
|
+
end
|
967
|
+
end
|
968
|
+
right_table_keys.each.with_index(right_offset) do |key, index|
|
969
|
+
unless right_indices.include?(index)
|
970
|
+
names << key
|
971
|
+
expressions << Arrow::FieldExpression.new("[#{index}]")
|
972
|
+
end
|
973
|
+
end
|
974
|
+
project_node_options = Arrow::ProjectNodeOptions.new(expressions, names)
|
975
|
+
plan.build_project_node(input_node, project_node_options)
|
947
976
|
end
|
948
977
|
|
949
|
-
|
950
|
-
|
951
|
-
|
952
|
-
|
978
|
+
def rename_keys(plan, input_node, context)
|
979
|
+
type_nick, left_table_keys, right_table_keys, *, suffix = context
|
980
|
+
names = input_node.output_schema.fields.map(&:name)
|
981
|
+
return input_node unless names.dup.uniq!
|
953
982
|
|
954
|
-
|
983
|
+
pos_rights =
|
984
|
+
if type_nick.start_with?('right')
|
985
|
+
names.size - right_table_keys.size
|
986
|
+
else
|
987
|
+
left_table_keys.size
|
988
|
+
end
|
989
|
+
rights = names[pos_rights..]
|
990
|
+
dup_keys = names.tally.select { |_, v| v > 1 }.keys
|
955
991
|
renamed_right_keys =
|
956
|
-
|
992
|
+
rights.map do |key|
|
957
993
|
if dup_keys.include?(key)
|
958
|
-
suffixed = "#{key}#{suffix}".
|
994
|
+
suffixed = "#{key}#{suffix}".to_s
|
959
995
|
# Find a key from suffixed.succ
|
960
|
-
(suffixed..).find { !
|
996
|
+
(suffixed..).find { !names.include?(_1) }
|
961
997
|
else
|
962
998
|
key
|
963
999
|
end
|
964
1000
|
end
|
965
|
-
|
966
|
-
|
967
|
-
fields =
|
968
|
-
joined_keys.map.with_index do |k, i|
|
969
|
-
Arrow::Field.new(k, joined_table[i].data_type)
|
970
|
-
end
|
971
|
-
Arrow::Table.new(Arrow::Schema.new(fields), joined_table.columns)
|
972
|
-
end
|
1001
|
+
names[pos_rights..] = renamed_right_keys
|
973
1002
|
|
974
|
-
|
975
|
-
|
976
|
-
|
977
|
-
Arrow::Function.find(:if_else).execute([t, array2, array1]).value
|
1003
|
+
expressions = names.map.with_index { |_, i| Arrow::FieldExpression.new("[#{i}]") }
|
1004
|
+
project_node_options = Arrow::ProjectNodeOptions.new(expressions, names)
|
1005
|
+
plan.build_project_node(input_node, project_node_options)
|
978
1006
|
end
|
979
1007
|
end
|
980
1008
|
end
|
@@ -269,12 +269,13 @@ module RedAmber
|
|
269
269
|
end
|
270
270
|
alias_method :glimpse, :tdr
|
271
271
|
|
272
|
-
# Shortcut for `tdr(:all)
|
272
|
+
# Shortcut for `tdr(:all)`.
|
273
273
|
#
|
274
|
+
# @param (see #tdr)
|
274
275
|
# @return (see #tdr)
|
275
276
|
#
|
276
|
-
def tdra
|
277
|
-
puts tdr_str(:all)
|
277
|
+
def tdra(tally: 5, elements: 5)
|
278
|
+
puts tdr_str(:all, tally: tally, elements: elements)
|
278
279
|
end
|
279
280
|
|
280
281
|
# rubocop:enable Layout/LineLength
|
@@ -504,9 +505,9 @@ module RedAmber
|
|
504
505
|
row.zip(formats).map do |elem, format|
|
505
506
|
non_ascii_diff = elem.ascii_only? ? 0 : elem.width - elem.size
|
506
507
|
if format.negative?
|
507
|
-
elem.ljust(-format
|
508
|
+
elem.ljust(-format - non_ascii_diff)
|
508
509
|
else
|
509
|
-
elem.rjust(format
|
510
|
+
elem.rjust(format - non_ascii_diff)
|
510
511
|
end
|
511
512
|
end
|
512
513
|
str.puts a.join(' ').rstrip
|
@@ -836,6 +836,55 @@ module RedAmber
|
|
836
836
|
tail(n_obs)
|
837
837
|
end
|
838
838
|
|
839
|
+
# Select records randomly to create a DataFrame.
|
840
|
+
# This method calls `indices.sample`.
|
841
|
+
# We can use the same arguments in `Vector#sample`.
|
842
|
+
# @note This method requires 'arrow-numo-narray' gem.
|
843
|
+
#
|
844
|
+
# @overload sample()
|
845
|
+
# Return a DataFrame with a randomly selected record.
|
846
|
+
#
|
847
|
+
# @return [DataFrame]
|
848
|
+
# a DataFrame with single record.
|
849
|
+
#
|
850
|
+
# @overload sample(n)
|
851
|
+
# Return a DataFrame with n records selected at random.
|
852
|
+
#
|
853
|
+
# @param n [Integer]
|
854
|
+
# positive number of records to select.
|
855
|
+
# If n is smaller or equal to size, records are selected by non-repeating.
|
856
|
+
# If n is greater than `size`, records are selected repeatedly.
|
857
|
+
# @return [DataFrame]
|
858
|
+
# a DataFrame with sampled records.
|
859
|
+
#
|
860
|
+
# @overload sample(prop)
|
861
|
+
# Return a DataFrame with records by proportion `prop` at random.
|
862
|
+
#
|
863
|
+
# @param prop [Float]
|
864
|
+
# positive proportion of records to select.
|
865
|
+
# Absolute number of records to select:`prop*size` is rounded (by `half: :up`).
|
866
|
+
# If prop is smaller or equal to 1.0, records are selected by non-repeating.
|
867
|
+
# If prop is greater than 1.0, some records are selected repeatedly.
|
868
|
+
# @return [Vector]
|
869
|
+
# a DataFrame with sampled records.
|
870
|
+
#
|
871
|
+
# @since 0.5.0
|
872
|
+
#
|
873
|
+
def sample(n_or_prop = nil)
|
874
|
+
slice { indices.sample(n_or_prop) }
|
875
|
+
end
|
876
|
+
|
877
|
+
# Returns a DataFrame with shuffled rows.
|
878
|
+
#
|
879
|
+
# @note This method requires 'arrow-numo-narray' gem.
|
880
|
+
# @note Same behavior as `DataFrame#sample(1.0)`
|
881
|
+
# @return (see #sample)
|
882
|
+
# @since 0.5.0
|
883
|
+
#
|
884
|
+
def shuffle
|
885
|
+
sample(1.0)
|
886
|
+
end
|
887
|
+
|
839
888
|
# Select records by index Array to create a DataFrame.
|
840
889
|
#
|
841
890
|
# - TODO: support for option `boundscheck: true`
|