red_amber 0.4.2 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.devcontainer/Dockerfile +75 -0
- data/.devcontainer/devcontainer.json +38 -0
- data/.devcontainer/onCreateCommand.sh +22 -0
- data/.rubocop.yml +11 -5
- data/CHANGELOG.md +141 -17
- data/Gemfile +5 -6
- data/README.ja.md +271 -0
- data/README.md +52 -31
- data/Rakefile +55 -0
- data/benchmark/group.yml +12 -5
- data/doc/Dev_Containers.ja.md +290 -0
- data/doc/Dev_Containers.md +292 -0
- data/doc/qmd/examples_of_red_amber.qmd +4596 -0
- data/doc/qmd/red-amber.qmd +90 -0
- data/docker/Dockerfile +2 -2
- data/docker/Gemfile +8 -3
- data/docker/docker-compose.yml +1 -1
- data/docker/readme.md +5 -5
- data/lib/red_amber/data_frame.rb +78 -4
- data/lib/red_amber/data_frame_combinable.rb +147 -119
- data/lib/red_amber/data_frame_displayable.rb +7 -6
- data/lib/red_amber/data_frame_loadsave.rb +1 -1
- data/lib/red_amber/data_frame_selectable.rb +51 -2
- data/lib/red_amber/data_frame_variable_operation.rb +6 -6
- data/lib/red_amber/group.rb +476 -127
- data/lib/red_amber/helper.rb +26 -0
- data/lib/red_amber/subframes.rb +18 -11
- data/lib/red_amber/vector.rb +45 -25
- data/lib/red_amber/vector_aggregation.rb +26 -0
- data/lib/red_amber/vector_selectable.rb +124 -40
- data/lib/red_amber/vector_string_function.rb +279 -0
- data/lib/red_amber/vector_unary_element_wise.rb +4 -0
- data/lib/red_amber/vector_updatable.rb +28 -0
- data/lib/red_amber/version.rb +1 -1
- data/lib/red_amber.rb +2 -1
- data/red_amber.gemspec +3 -3
- metadata +19 -14
- data/docker/Gemfile.lock +0 -80
- data/docker/example +0 -74
- data/docker/notebook/examples_of_red_amber.ipynb +0 -8562
- data/docker/notebook/red-amber.ipynb +0 -188
@@ -269,12 +269,13 @@ module RedAmber
|
|
269
269
|
end
|
270
270
|
alias_method :glimpse, :tdr
|
271
271
|
|
272
|
-
# Shortcut for `tdr(:all)
|
272
|
+
# Shortcut for `tdr(:all)`.
|
273
273
|
#
|
274
|
+
# @param (see #tdr)
|
274
275
|
# @return (see #tdr)
|
275
276
|
#
|
276
|
-
def tdra
|
277
|
-
puts tdr_str(:all)
|
277
|
+
def tdra(tally: 5, elements: 5)
|
278
|
+
puts tdr_str(:all, tally: tally, elements: elements)
|
278
279
|
end
|
279
280
|
|
280
281
|
# rubocop:enable Layout/LineLength
|
@@ -504,9 +505,9 @@ module RedAmber
|
|
504
505
|
row.zip(formats).map do |elem, format|
|
505
506
|
non_ascii_diff = elem.ascii_only? ? 0 : elem.width - elem.size
|
506
507
|
if format.negative?
|
507
|
-
elem.ljust(-format
|
508
|
+
elem.ljust(-format - non_ascii_diff)
|
508
509
|
else
|
509
|
-
elem.rjust(format
|
510
|
+
elem.rjust(format - non_ascii_diff)
|
510
511
|
end
|
511
512
|
end
|
512
513
|
str.puts a.join(' ').rstrip
|
@@ -539,7 +540,7 @@ module RedAmber
|
|
539
540
|
in ''
|
540
541
|
'""'
|
541
542
|
in String
|
542
|
-
element.sub(
|
543
|
+
element.sub(/\A(\s+)$/, '"\1"') # blank spaces
|
543
544
|
in Float
|
544
545
|
format('%g', element)
|
545
546
|
in Integer
|
@@ -44,7 +44,7 @@ module RedAmber
|
|
44
44
|
# BUFFER
|
45
45
|
#
|
46
46
|
# @example Load from a Buffer skipping comment line
|
47
|
-
# DataFrame.load(Arrow::Buffer.new(<<~BUFFER), format: :csv, skip_lines:
|
47
|
+
# DataFrame.load(Arrow::Buffer.new(<<~BUFFER), format: :csv, skip_lines: /\A#/)
|
48
48
|
# # comment
|
49
49
|
# name,age
|
50
50
|
# Yasuko,68
|
@@ -39,7 +39,7 @@ module RedAmber
|
|
39
39
|
# penguins[:bill_length_mm]
|
40
40
|
#
|
41
41
|
# # =>
|
42
|
-
# #<RedAmber::Vector(:double, size=344):
|
42
|
+
# #<RedAmber::Vector(:double, size=344, chunked):0x0000000000008f0c>
|
43
43
|
# [39.1, 39.5, 40.3, nil, 36.7, 39.3, 38.9, 39.2, 34.1, 42.0, 37.8, 37.8, 41.1, ... ]
|
44
44
|
#
|
45
45
|
# @overload [](keys)
|
@@ -173,7 +173,7 @@ module RedAmber
|
|
173
173
|
# penguins.v(:bill_length_mm)
|
174
174
|
#
|
175
175
|
# # =>
|
176
|
-
# #<RedAmber::Vector(:double, size=344):
|
176
|
+
# #<RedAmber::Vector(:double, size=344, chunked):0x0000000000008f0c>
|
177
177
|
# [39.1, 39.5, 40.3, nil, 36.7, 39.3, 38.9, 39.2, 34.1, 42.0, 37.8, 37.8, 41.1, ... ]
|
178
178
|
#
|
179
179
|
def v(key)
|
@@ -836,6 +836,55 @@ module RedAmber
|
|
836
836
|
tail(n_obs)
|
837
837
|
end
|
838
838
|
|
839
|
+
# Select records randomly to create a DataFrame.
|
840
|
+
# This method calls `indices.sample`.
|
841
|
+
# We can use the same arguments in `Vector#sample`.
|
842
|
+
# @note This method requires 'arrow-numo-narray' gem.
|
843
|
+
#
|
844
|
+
# @overload sample()
|
845
|
+
# Return a DataFrame with a randomly selected record.
|
846
|
+
#
|
847
|
+
# @return [DataFrame]
|
848
|
+
# a DataFrame with single record.
|
849
|
+
#
|
850
|
+
# @overload sample(n)
|
851
|
+
# Return a DataFrame with n records selected at random.
|
852
|
+
#
|
853
|
+
# @param n [Integer]
|
854
|
+
# positive number of records to select.
|
855
|
+
# If n is smaller or equal to size, records are selected by non-repeating.
|
856
|
+
# If n is greater than `size`, records are selected repeatedly.
|
857
|
+
# @return [DataFrame]
|
858
|
+
# a DataFrame with sampled records.
|
859
|
+
#
|
860
|
+
# @overload sample(prop)
|
861
|
+
# Return a DataFrame with records by proportion `prop` at random.
|
862
|
+
#
|
863
|
+
# @param prop [Float]
|
864
|
+
# positive proportion of records to select.
|
865
|
+
# Absolute number of records to select:`prop*size` is rounded (by `half: :up`).
|
866
|
+
# If prop is smaller or equal to 1.0, records are selected by non-repeating.
|
867
|
+
# If prop is greater than 1.0, some records are selected repeatedly.
|
868
|
+
# @return [Vector]
|
869
|
+
# a DataFrame with sampled records.
|
870
|
+
#
|
871
|
+
# @since 0.5.0
|
872
|
+
#
|
873
|
+
def sample(n_or_prop = nil)
|
874
|
+
slice { indices.sample(n_or_prop) }
|
875
|
+
end
|
876
|
+
|
877
|
+
# Returns a DataFrame with shuffled rows.
|
878
|
+
#
|
879
|
+
# @note This method requires 'arrow-numo-narray' gem.
|
880
|
+
# @note Same behavior as `DataFrame#sample(1.0)`
|
881
|
+
# @return (see #sample)
|
882
|
+
# @since 0.5.0
|
883
|
+
#
|
884
|
+
def shuffle
|
885
|
+
sample(1.0)
|
886
|
+
end
|
887
|
+
|
839
888
|
# Select records by index Array to create a DataFrame.
|
840
889
|
#
|
841
890
|
# - TODO: support for option `boundscheck: true`
|
@@ -44,7 +44,7 @@ module RedAmber
|
|
44
44
|
# languages[:Language]
|
45
45
|
#
|
46
46
|
# # =>
|
47
|
-
# #<RedAmber::Vector(:string, size=4):0x000000000010359c>
|
47
|
+
# #<RedAmber::Vector(:string, size=4, chunked):0x000000000010359c>
|
48
48
|
# ["Ruby", "Python", "R", "Rust"]
|
49
49
|
#
|
50
50
|
# @overload pick(booleans)
|
@@ -512,8 +512,8 @@ module RedAmber
|
|
512
512
|
# 1 Rui 49 78 (nil)
|
513
513
|
# 2 Hinata 28 57 Momotaro
|
514
514
|
#
|
515
|
-
def assign(
|
516
|
-
assign_update(
|
515
|
+
def assign(...)
|
516
|
+
assign_update(false, ...)
|
517
517
|
end
|
518
518
|
|
519
519
|
# Assign new or updated variables (columns) and create an updated DataFrame.
|
@@ -583,13 +583,13 @@ module RedAmber
|
|
583
583
|
# @return [DataFrame]
|
584
584
|
# assigned DataFrame.
|
585
585
|
#
|
586
|
-
def assign_left(
|
587
|
-
assign_update(
|
586
|
+
def assign_left(...)
|
587
|
+
assign_update(true, ...)
|
588
588
|
end
|
589
589
|
|
590
590
|
private
|
591
591
|
|
592
|
-
def assign_update(*assigner,
|
592
|
+
def assign_update(append_to_left, *assigner, &block)
|
593
593
|
if block
|
594
594
|
assigner_from_block = instance_eval(&block)
|
595
595
|
assigner =
|