red_amber 0.4.2 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/.devcontainer/Dockerfile +75 -0
  3. data/.devcontainer/devcontainer.json +38 -0
  4. data/.devcontainer/onCreateCommand.sh +22 -0
  5. data/.rubocop.yml +11 -5
  6. data/CHANGELOG.md +141 -17
  7. data/Gemfile +5 -6
  8. data/README.ja.md +271 -0
  9. data/README.md +52 -31
  10. data/Rakefile +55 -0
  11. data/benchmark/group.yml +12 -5
  12. data/doc/Dev_Containers.ja.md +290 -0
  13. data/doc/Dev_Containers.md +292 -0
  14. data/doc/qmd/examples_of_red_amber.qmd +4596 -0
  15. data/doc/qmd/red-amber.qmd +90 -0
  16. data/docker/Dockerfile +2 -2
  17. data/docker/Gemfile +8 -3
  18. data/docker/docker-compose.yml +1 -1
  19. data/docker/readme.md +5 -5
  20. data/lib/red_amber/data_frame.rb +78 -4
  21. data/lib/red_amber/data_frame_combinable.rb +147 -119
  22. data/lib/red_amber/data_frame_displayable.rb +7 -6
  23. data/lib/red_amber/data_frame_loadsave.rb +1 -1
  24. data/lib/red_amber/data_frame_selectable.rb +51 -2
  25. data/lib/red_amber/data_frame_variable_operation.rb +6 -6
  26. data/lib/red_amber/group.rb +476 -127
  27. data/lib/red_amber/helper.rb +26 -0
  28. data/lib/red_amber/subframes.rb +18 -11
  29. data/lib/red_amber/vector.rb +45 -25
  30. data/lib/red_amber/vector_aggregation.rb +26 -0
  31. data/lib/red_amber/vector_selectable.rb +124 -40
  32. data/lib/red_amber/vector_string_function.rb +279 -0
  33. data/lib/red_amber/vector_unary_element_wise.rb +4 -0
  34. data/lib/red_amber/vector_updatable.rb +28 -0
  35. data/lib/red_amber/version.rb +1 -1
  36. data/lib/red_amber.rb +2 -1
  37. data/red_amber.gemspec +3 -3
  38. metadata +19 -14
  39. data/docker/Gemfile.lock +0 -80
  40. data/docker/example +0 -74
  41. data/docker/notebook/examples_of_red_amber.ipynb +0 -8562
  42. data/docker/notebook/red-amber.ipynb +0 -188
@@ -269,12 +269,13 @@ module RedAmber
269
269
  end
270
270
  alias_method :glimpse, :tdr
271
271
 
272
- # Shortcut for `tdr(:all)``.
272
+ # Shortcut for `tdr(:all)`.
273
273
  #
274
+ # @param (see #tdr)
274
275
  # @return (see #tdr)
275
276
  #
276
- def tdra
277
- puts tdr_str(:all)
277
+ def tdra(tally: 5, elements: 5)
278
+ puts tdr_str(:all, tally: tally, elements: elements)
278
279
  end
279
280
 
280
281
  # rubocop:enable Layout/LineLength
@@ -504,9 +505,9 @@ module RedAmber
504
505
  row.zip(formats).map do |elem, format|
505
506
  non_ascii_diff = elem.ascii_only? ? 0 : elem.width - elem.size
506
507
  if format.negative?
507
- elem.ljust(-format + non_ascii_diff)
508
+ elem.ljust(-format - non_ascii_diff)
508
509
  else
509
- elem.rjust(format + non_ascii_diff)
510
+ elem.rjust(format - non_ascii_diff)
510
511
  end
511
512
  end
512
513
  str.puts a.join(' ').rstrip
@@ -539,7 +540,7 @@ module RedAmber
539
540
  in ''
540
541
  '""'
541
542
  in String
542
- element.sub(/^(\s+)$/, '"\1"') # blank spaces
543
+ element.sub(/\A(\s+)$/, '"\1"') # blank spaces
543
544
  in Float
544
545
  format('%g', element)
545
546
  in Integer
@@ -44,7 +44,7 @@ module RedAmber
44
44
  # BUFFER
45
45
  #
46
46
  # @example Load from a Buffer skipping comment line
47
- # DataFrame.load(Arrow::Buffer.new(<<~BUFFER), format: :csv, skip_lines: /^#/)
47
+ # DataFrame.load(Arrow::Buffer.new(<<~BUFFER), format: :csv, skip_lines: /\A#/)
48
48
  # # comment
49
49
  # name,age
50
50
  # Yasuko,68
@@ -39,7 +39,7 @@ module RedAmber
39
39
  # penguins[:bill_length_mm]
40
40
  #
41
41
  # # =>
42
- # #<RedAmber::Vector(:double, size=344):0x00000000000104dc>
42
+ # #<RedAmber::Vector(:double, size=344, chunked):0x0000000000008f0c>
43
43
  # [39.1, 39.5, 40.3, nil, 36.7, 39.3, 38.9, 39.2, 34.1, 42.0, 37.8, 37.8, 41.1, ... ]
44
44
  #
45
45
  # @overload [](keys)
@@ -173,7 +173,7 @@ module RedAmber
173
173
  # penguins.v(:bill_length_mm)
174
174
  #
175
175
  # # =>
176
- # #<RedAmber::Vector(:double, size=344):0x000000000000f140>
176
+ # #<RedAmber::Vector(:double, size=344, chunked):0x0000000000008f0c>
177
177
  # [39.1, 39.5, 40.3, nil, 36.7, 39.3, 38.9, 39.2, 34.1, 42.0, 37.8, 37.8, 41.1, ... ]
178
178
  #
179
179
  def v(key)
@@ -836,6 +836,55 @@ module RedAmber
836
836
  tail(n_obs)
837
837
  end
838
838
 
839
+ # Select records randomly to create a DataFrame.
840
+ # This method calls `indices.sample`.
841
+ # We can use the same arguments in `Vector#sample`.
842
+ # @note This method requires 'arrow-numo-narray' gem.
843
+ #
844
+ # @overload sample()
845
+ # Return a DataFrame with a randomly selected record.
846
+ #
847
+ # @return [DataFrame]
848
+ # a DataFrame with single record.
849
+ #
850
+ # @overload sample(n)
851
+ # Return a DataFrame with n records selected at random.
852
+ #
853
+ # @param n [Integer]
854
+ # positive number of records to select.
855
+ # If n is smaller or equal to size, records are selected by non-repeating.
856
+ # If n is greater than `size`, records are selected repeatedly.
857
+ # @return [DataFrame]
858
+ # a DataFrame with sampled records.
859
+ #
860
+ # @overload sample(prop)
861
+ # Return a DataFrame with records by proportion `prop` at random.
862
+ #
863
+ # @param prop [Float]
864
+ # positive proportion of records to select.
865
+ # Absolute number of records to select:`prop*size` is rounded (by `half: :up`).
866
+ # If prop is smaller or equal to 1.0, records are selected by non-repeating.
867
+ # If prop is greater than 1.0, some records are selected repeatedly.
868
+ # @return [Vector]
869
+ # a DataFrame with sampled records.
870
+ #
871
+ # @since 0.5.0
872
+ #
873
+ def sample(n_or_prop = nil)
874
+ slice { indices.sample(n_or_prop) }
875
+ end
876
+
877
+ # Returns a DataFrame with shuffled rows.
878
+ #
879
+ # @note This method requires 'arrow-numo-narray' gem.
880
+ # @note Same behavior as `DataFrame#sample(1.0)`
881
+ # @return (see #sample)
882
+ # @since 0.5.0
883
+ #
884
+ def shuffle
885
+ sample(1.0)
886
+ end
887
+
839
888
  # Select records by index Array to create a DataFrame.
840
889
  #
841
890
  # - TODO: support for option `boundscheck: true`
@@ -44,7 +44,7 @@ module RedAmber
44
44
  # languages[:Language]
45
45
  #
46
46
  # # =>
47
- # #<RedAmber::Vector(:string, size=4):0x000000000010359c>
47
+ # #<RedAmber::Vector(:string, size=4, chunked):0x000000000010359c>
48
48
  # ["Ruby", "Python", "R", "Rust"]
49
49
  #
50
50
  # @overload pick(booleans)
@@ -512,8 +512,8 @@ module RedAmber
512
512
  # 1 Rui 49 78 (nil)
513
513
  # 2 Hinata 28 57 Momotaro
514
514
  #
515
- def assign(*assigner, &block)
516
- assign_update(*assigner, append_to_left: false, &block)
515
+ def assign(...)
516
+ assign_update(false, ...)
517
517
  end
518
518
 
519
519
  # Assign new or updated variables (columns) and create an updated DataFrame.
@@ -583,13 +583,13 @@ module RedAmber
583
583
  # @return [DataFrame]
584
584
  # assigned DataFrame.
585
585
  #
586
- def assign_left(*assigner, &block)
587
- assign_update(*assigner, append_to_left: true, &block)
586
+ def assign_left(...)
587
+ assign_update(true, ...)
588
588
  end
589
589
 
590
590
  private
591
591
 
592
- def assign_update(*assigner, append_to_left: false, &block)
592
+ def assign_update(append_to_left, *assigner, &block)
593
593
  if block
594
594
  assigner_from_block = instance_eval(&block)
595
595
  assigner =