red_amber 0.4.2 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/.devcontainer/Dockerfile +75 -0
  3. data/.devcontainer/devcontainer.json +38 -0
  4. data/.devcontainer/onCreateCommand.sh +22 -0
  5. data/.rubocop.yml +11 -5
  6. data/CHANGELOG.md +141 -17
  7. data/Gemfile +5 -6
  8. data/README.ja.md +271 -0
  9. data/README.md +52 -31
  10. data/Rakefile +55 -0
  11. data/benchmark/group.yml +12 -5
  12. data/doc/Dev_Containers.ja.md +290 -0
  13. data/doc/Dev_Containers.md +292 -0
  14. data/doc/qmd/examples_of_red_amber.qmd +4596 -0
  15. data/doc/qmd/red-amber.qmd +90 -0
  16. data/docker/Dockerfile +2 -2
  17. data/docker/Gemfile +8 -3
  18. data/docker/docker-compose.yml +1 -1
  19. data/docker/readme.md +5 -5
  20. data/lib/red_amber/data_frame.rb +78 -4
  21. data/lib/red_amber/data_frame_combinable.rb +147 -119
  22. data/lib/red_amber/data_frame_displayable.rb +7 -6
  23. data/lib/red_amber/data_frame_loadsave.rb +1 -1
  24. data/lib/red_amber/data_frame_selectable.rb +51 -2
  25. data/lib/red_amber/data_frame_variable_operation.rb +6 -6
  26. data/lib/red_amber/group.rb +476 -127
  27. data/lib/red_amber/helper.rb +26 -0
  28. data/lib/red_amber/subframes.rb +18 -11
  29. data/lib/red_amber/vector.rb +45 -25
  30. data/lib/red_amber/vector_aggregation.rb +26 -0
  31. data/lib/red_amber/vector_selectable.rb +124 -40
  32. data/lib/red_amber/vector_string_function.rb +279 -0
  33. data/lib/red_amber/vector_unary_element_wise.rb +4 -0
  34. data/lib/red_amber/vector_updatable.rb +28 -0
  35. data/lib/red_amber/version.rb +1 -1
  36. data/lib/red_amber.rb +2 -1
  37. data/red_amber.gemspec +3 -3
  38. metadata +19 -14
  39. data/docker/Gemfile.lock +0 -80
  40. data/docker/example +0 -74
  41. data/docker/notebook/examples_of_red_amber.ipynb +0 -8562
  42. data/docker/notebook/red-amber.ipynb +0 -188
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 498f580bc6fc79e36b199cc3ec62c38638e4df903e956a7a78947d9091748d0c
4
- data.tar.gz: 24c3f25ff47b6bba0af26d1a1c77a80a56c2115e0bd76d26ba3f769f71d3557b
3
+ metadata.gz: b921353dbfcaf634a2e026f541caaf914125482c956fa05886f6a542f6ac2e35
4
+ data.tar.gz: 737fae720227e8e3ef36c2c3142bdb7096b051fd51363a2978079e766081d320
5
5
  SHA512:
6
- metadata.gz: '085caa83703f4b9be0a3baae3318f0bcb606bbf264347ad3db431b29fb59c87e05b142ab1fd7e8a0af0a3d64b0b81055f1e85d82414f773d01456cf5a386df25'
7
- data.tar.gz: 901e7eda6560eb2a9ab378d672b39d3ba08ae47012d8465ce541a465d744ce45d93a451ef2bff8fdf838fc1040b12facc58c87d95c6f6bedb89c5c138b95005c
6
+ metadata.gz: 8f45b7c45725b2da1d9459edaa3ae59a2355e771055c281eb612356703b727315b64ed6963ce02493e6c8ef94194e848fc19b1cf4bbd0620b44ba2e24c1953b0
7
+ data.tar.gz: ca9f134d488168f0d2396f55b118fa7db78d2f6969286896ba2139d891a8a09f8a7fde69141c2c226212748c16f93ece4737f65465f9ea0616e500177806c314
@@ -0,0 +1,75 @@
1
+ # [Choice] debian-11, debian-10, ubuntu-22.04, ubuntu-20.04, ubuntu-18.04
2
+ ARG VARIANT=ubuntu-22.04
3
+
4
+ FROM mcr.microsoft.com/devcontainers/base:${VARIANT}
5
+
6
+ # Set env for tracking that we're running in a devcontainer
7
+ ENV DEVCONTAINER=true
8
+
9
+ RUN set -e; \
10
+ apt-get update; \
11
+ apt-get install -y \
12
+ # To build Ruby
13
+ autoconf \
14
+ bison \
15
+ rustc \
16
+ libssl-dev \
17
+ libyaml-dev \
18
+ libreadline6-dev \
19
+ zlib1g-dev \
20
+ libgmp-dev \
21
+ libncurses5-dev \
22
+ libffi-dev \
23
+ libgdbm6 \
24
+ libgdbm-dev \
25
+ libdb-dev \
26
+ uuid-dev \
27
+ # To install IRuby
28
+ libczmq-dev \
29
+ libzmq3-dev
30
+
31
+ # Install Apache Arrow
32
+ ARG APACHE_ARROW_VERSION=12.0.1-1
33
+ ARG arrow_deb_tmp=/tmp/apache-arrow-apt-source-latest.deb
34
+ ARG arrow_apt_source=https://apache.jfrog.io/artifactory/arrow/ubuntu/pool/jammy/main/a/apache-arrow-apt-source/apache-arrow-apt-source_${APACHE_ARROW_VERSION}_all.deb
35
+ RUN set -e; \
36
+ apt-get update; \
37
+ curl -sfSL -o ${arrow_deb_tmp} ${arrow_apt_source}; \
38
+ apt-get install -y --no-install-recommends ${arrow_deb_tmp}; \
39
+ rm -f ${arrow_deb_tmp}; \
40
+ apt-get update; \
41
+ apt-get install -y \
42
+ libarrow-dev \
43
+ libarrow-glib-dev \
44
+ libarrow-dataset-dev \
45
+ libarrow-flight-dev \
46
+ libparquet-dev \
47
+ libparquet-glib-dev \
48
+ libgandiva-dev \
49
+ libgandiva-glib-dev
50
+
51
+ # This Dockerfile adds a non-root user with sudo access. Use the "remoteUser"
52
+ ARG USERNAME=vscode
53
+ ARG USER_UID=1000
54
+ ARG USER_GID=$USER_UID
55
+
56
+ USER $USERNAME
57
+
58
+ # Install rbenv
59
+ ARG RBENV_RUBY=3.2.2
60
+ RUN set -e; \
61
+ git clone https://github.com/rbenv/rbenv.git $HOME/.rbenv; \
62
+ echo 'eval "$($HOME/.rbenv/bin/rbenv init -)"' >> $HOME/.profile; \
63
+ echo 'eval "$($HOME/.rbenv/bin/rbenv init -)"' >> $HOME/.bashrc; \
64
+ git clone https://github.com/rbenv/ruby-build.git $HOME/.rbenv/plugins/ruby-build
65
+
66
+ # Install Ruby
67
+ # Append `RUBY_CONFIGURE_OPTS=--disable-install-doc ` before rbenv to disable documents
68
+ RUN set -e; \
69
+ $HOME/.rbenv/bin/rbenv install --verbose $RBENV_RUBY; \
70
+ $HOME/.rbenv/bin/rbenv global $RBENV_RUBY
71
+
72
+ # Install IRuby
73
+ RUN set -e; \
74
+ $HOME/.rbenv/bin/rbenv exec gem install iruby; \
75
+ $HOME/.rbenv/bin/rbenv exec iruby register --force
@@ -0,0 +1,38 @@
1
+ {
2
+ "name": "RedAmber",
3
+ "build": {
4
+ "dockerfile": "Dockerfile",
5
+ "cacheFrom": "ghcr.io/red-data-tools/red-amber"
6
+ },
7
+ "features": {
8
+ "ghcr.io/devcontainers/features/python:1": {
9
+ "installTools": true,
10
+ "installJupyterlab": true,
11
+ "version": "3.11"
12
+ },
13
+ // We don't use Ruby feature here
14
+ // "ghcr.io/devcontainers/features/ruby:1": {},
15
+ "ghcr.io/rocker-org/devcontainer-features/quarto-cli:1": {
16
+ "installTinyTex": true,
17
+ "version": "latest"
18
+ },
19
+ "ghcr.io/devcontainers/features/github-cli:1": {}
20
+ },
21
+ // VS Code extentions for Ruby
22
+ "customizations": {
23
+ "vscode": {
24
+ "extensions": [
25
+ "rebornix.Ruby",
26
+ "shopify.ruby-lsp"
27
+ ]
28
+ }
29
+ },
30
+ // Use init process to deal with zombie process
31
+ "init": true,
32
+ // set TZ from local machine's environment defaulting to 'UTC' if not supplied.
33
+ "containerEnv": {
34
+ "RUBYLIB": "/workspaces/red_amber/lib",
35
+ "TZ": "${localEnv:TZ:UTC}"
36
+ },
37
+ "onCreateCommand": ".devcontainer/onCreateCommand.sh"
38
+ }
@@ -0,0 +1,22 @@
1
+ #!/usr/bin/env bash
2
+ set -e
3
+
4
+ # Install language and set timezone
5
+ # You should change here if you use another
6
+ sudo apt-get update
7
+ sudo apt-get install -y language-pack-ja
8
+
9
+ echo 'export LANG=ja_JP.UTF-8' >> $HOME/.bashrc
10
+ echo 'export LANG=ja_JP.UTF-8' >> $HOME/.profile
11
+ echo 'export TZ=Asia/Tokyo' >> $HOME/.bashrc
12
+ echo 'export TZ=Asia/Tokyo' >> $HOME/.profile
13
+
14
+ # Install HaranoAjiFonts
15
+ mkdir -p $HOME/.fonts
16
+ git clone https://github.com/trueroad/HaranoAjiFonts.git $HOME/.fonts/HaranoAjiFonts
17
+
18
+ # Install gems
19
+ bundle install
20
+
21
+ # Create Jupyter Notebooks
22
+ rake quarto:convert
data/.rubocop.yml CHANGED
@@ -52,7 +52,7 @@ Lint/BinaryOperatorWithIdenticalOperands:
52
52
 
53
53
  Lint/Debugger:
54
54
  Exclude:
55
- - 'docker/example'
55
+ - 'bin/example'
56
56
 
57
57
  # Need for test with empty block
58
58
  # Offense count: 1
@@ -76,7 +76,8 @@ Metrics/AbcSize:
76
76
  Max: 30
77
77
  CountRepeatedAttributes: false
78
78
  AllowedMethods: [
79
- 'join', # 51.87
79
+ 'join_merge_keys', # 54.18
80
+ 'join', # 53.1
80
81
  'dataframe_info', # 46.5
81
82
  'format_table', # 84.62
82
83
  'to_long', # 33.66
@@ -87,6 +88,9 @@ Metrics/AbcSize:
87
88
  '[]', # 33.76
88
89
  'split', # 37.35
89
90
  'aggregate', # 38.13
91
+ 'filters', # 33.91
92
+ 'merge_keys', # 32.17
93
+ 'rename_keys', # 31.64
90
94
  ]
91
95
 
92
96
  # Max: 25
@@ -139,10 +143,12 @@ Metrics/MethodLength:
139
143
  Max: 30
140
144
  AllowedMethods: [
141
145
  'join', # 47
142
- 'dataframe_info', # 33
146
+ 'join_merge_keys', # 41
143
147
  'format_table', # 53
144
148
  'slice_by', # 38
145
149
  'assign_update', # 35
150
+ 'summarize', # 35
151
+ 'dataframe_info', # 33
146
152
  'drop', # 32
147
153
  'aggregate', # 31
148
154
  ]
@@ -219,7 +225,7 @@ Naming/PredicateName:
219
225
  Rubycw/Rubycw:
220
226
  Exclude:
221
227
  - 'test/**/*'
222
- - 'docker/example'
228
+ - 'bin/example'
223
229
 
224
230
  # Offense count: 16
225
231
  # This cop supports safe autocorrection (--autocorrect).
@@ -236,7 +242,7 @@ Style/SlicingWithRange:
236
242
 
237
243
  Style/MixinUsage:
238
244
  Exclude:
239
- - 'docker/example'
245
+ - 'bin/example'
240
246
 
241
247
  # Necessary to Vector < 0 element-wise comparison
242
248
  # Offense count: 5
data/CHANGELOG.md CHANGED
@@ -1,10 +1,134 @@
1
+ ## [0.5.1] - 2023-08-18
2
+
3
+ Docker environment is replaced by Dev Container,
4
+ and Jupyter Notebooks will be created from qmd files.
5
+
6
+ - Breaking change
7
+
8
+ - Bug fixes
9
+ - Fix timestamp test to set TZ locally (#249)
10
+ - Fix regexp for beginning of String (#251)
11
+ - Fix loading bin/Gemfile locally in bin/jupyter script (#261)
12
+
13
+ - New features and improvements
14
+ - Support sort and null_placement options in Vector#rank (#265)
15
+ - Add Vector#find_substring method (#270)
16
+ - Add Group#one method (#274)
17
+ - Add Group#all and #any method (#274)
18
+ - Add Group#median method (#274)
19
+ - Add Group#count_uniq method (#274)
20
+ - Introduce Dev Container environment
21
+ - Introduce Devcontainer environment (#253)
22
+ - Change lifecycle script from postCreate to onCreate (#253)
23
+ - Move example to bin (#253)
24
+ - Fix Python and Ruby versions in Dev Container (#254)
25
+ - Add locale and timezone settings (#256)
26
+ - Add quarto from devcontainer feature (#259)
27
+ - Install HaranoAjiFonts as default Tex font (#259)
28
+
29
+ - Refactoring
30
+ - Rename boolean methods in VectorStringFunction (#263)
31
+ - Refine Vector#inspect to show wheather chunked or not (#267)
32
+ - Add an alias Group#count_all for #group_count (#274)
33
+
34
+ - Improve in tests/CI
35
+ - Create rake commands for Notebook convert/test (#269)
36
+ - Fix rubocop warning of forwarding arguments in assign_update (#269)
37
+ - Use rake to start example script (#269)
38
+ - Add test in Vector#rank to cover illegal rank option error (#271)
39
+ - Add bundle install to Rakefile (#276)
40
+ - Use Dockerfile to create dev container (#276)
41
+ - Save image to ghcr in ci (#276)
42
+
43
+ - Documentation and Example
44
+ - YARD
45
+ - Update Docker Environment (#245)
46
+ - Refine jupyter notebook environment (#253)
47
+ - Refine yard in Group aggregations (#274)
48
+ - Fix yard of Vector#rank (#269)
49
+ - Fix yard of Group (#269)
50
+ - Notebook
51
+ - Start source management for jupyter notebook by qmd (#259)
52
+ - Don't create ipynb if it exists (#261)
53
+ - Add Group methods (125 in total) (#269)
54
+ - Add ArrowFunction (126 in total) (#269)
55
+ - Add DataFrame#auto_cast (127 in total) (#269)
56
+ - Update required version in examples notebook (#269)
57
+ - Update examples_of_red_amber (#269)
58
+ - Update red-amber.qmd (#269)
59
+
60
+ - GitHub site
61
+ - Fix broken link in README/README.ja by Viktorius Suwandi (#262)
62
+ - Change description in gemspec (#254)
63
+ - Add documents for Dev Container (#254)
64
+
65
+ - Thanks
66
+ - Viktorius Suwandi
67
+
68
+ ## [0.5.0] - 2023-05-24
69
+
70
+ - Breaking change
71
+ - Use non keyword argument in #sub_by_value (#219)
72
+ - Upgrade dependency to Arrow 12.0.0 (#238)
73
+ - right_join will output columns as same order as Red Arrow.
74
+ - DataFrame#join will not force ordering of original column by default
75
+ - Join with type, such as full_join, sort after join by default
76
+
77
+ - Bug fixes
78
+ - Use truncate in Vector#sample(float) (#229)
79
+ - Support options in DataFrame#tdra (#231)
80
+ - Fix printing table with non-ascii strings (#233)
81
+ - Fix join for Arrow 12.0.0
82
+
83
+ - New features and improvements
84
+ - Add a singleton method Vector.[] (#218)
85
+ - Add an alias #sub_group (#219)
86
+ - Accept Group#summarize{Hash} to rename aggregated columns (#219)
87
+ - Add Group#group_frame (#219)
88
+ - Add Vector#cast (#224)
89
+ - Add Vector#fill_nil(value) (#226)
90
+ - Add Vector#one (#227)
91
+ - Add Vector#mode (#228)
92
+ - Add DataFrame#propagate (#235)
93
+ - Add DataFrame#sample (#237)
94
+ - Add DataFrame#shuffle (#237)
95
+ - Support RankOptions in Vector#rank (#239)
96
+ - Introduce MatchSubstringOptions family in Vector (#241)
97
+ - Introduce Vector#match_substring?
98
+ - Add Vector#end_with?, #start_with? method
99
+ - Add Vector#match_like?
100
+ - Add Vector#count_substring method
101
+
102
+ - Refactoring
103
+ - Refine Group and SubFrames function (#219)
104
+ - Refine Group#group_count
105
+ - Use Acero in Group#filters
106
+ - Refine Group#filters, not using Acero
107
+ - Refine Group#summarize(array)
108
+ - Use Acero for renaming columns in join (#238)
109
+ - Use index kernel with IndexOptions introduced in 12.0.0 (#240)
110
+
111
+ - Improve in tests/CI
112
+ - Use Fedra 39 Rawhide in CI (#238)
113
+
114
+ - Documentation and Example
115
+ - Add missing yard documents for SubFrames::Selectors (#219)
116
+ - Update docker/example (#219)
117
+ - Update Gemfile in docker (#219)
118
+ - Add README.ja.md (#242)
119
+
120
+ - GitHub site
121
+ - Update link of Red Data Tools Chat to matrix (#242)
122
+
123
+ - Thanks
124
+
1
125
  ## [0.4.2] - 2023-04-02
2
126
 
3
127
  - Breaking change
4
128
 
5
129
  - Bug fixes
6
130
  - Fix Vector#modulo, #fdiv, #remainder (#203)
7
-
131
+
8
132
  - New features and improvements
9
133
  - Update SubFrames#take to return SubFrames (#212)
10
134
 
@@ -49,7 +173,7 @@
49
173
  - Fix Vector#rank when data is ChunkedArray (#198)
50
174
  - Fix Vector element-wise functions with nil as scalar (#198)
51
175
  - Support :force_order for all methods of join family (#199)
52
- - Supports :force_order option to force sorting after join for all #join familiy.
176
+ - Supports :force_order option to force sorting after join for all #join familiy.
53
177
  - This will valuable in some cases such as large dataframes.
54
178
  - Ensure baseframe's schema for SubFrames (#200)
55
179
 
@@ -451,11 +575,11 @@
451
575
  - Move binder support to heronshoes/docker-stacks repository.
452
576
  - Update README notebook on binder.
453
577
  - Add examples_of_RedAmber notebook on binder.
454
-
578
+
455
579
  - Start to use discussions.
456
580
 
457
581
  - Thanks
458
-
582
+
459
583
  - Kenta Murata
460
584
 
461
585
  ## [0.2.1] - 2022-09-07
@@ -504,7 +628,7 @@
504
628
  - Update Jupyter Notebook `71 examples of RedAmber`
505
629
 
506
630
  - Thanks
507
-
631
+
508
632
  - Kenta Murata
509
633
 
510
634
  ## [0.2.0] - 2022-08-15
@@ -519,7 +643,7 @@
519
643
  - Remove optional `require` for rover (#55)
520
644
  Fix DataFrame.new for argument with Rover::DataFrame.
521
645
  - Fix occasional failure in CI (#59)
522
- Sometimes the CI test fails. I added -dev dependency
646
+ Sometimes the CI test fails. I added -dev dependency
523
647
  in Arrow install by apt, not doing in bundler.
524
648
 
525
649
  - Fix calling :take in V#[] (#56)
@@ -537,7 +661,7 @@
537
661
  - Upgrade to Arrow 9.0.0 (#59)
538
662
  - Add Vector#quantile method (#59)
539
663
  Arrow::QuantileOptions has supported in Arrow GLib 9.0.0 (ARROW-16623, Thanks!)
540
-
664
+
541
665
  - Add Vector#quantiles (#62)
542
666
 
543
667
  - Add DataFrame#each_row (#56)
@@ -548,7 +672,7 @@
548
672
  - Refine DataFrame.new to use pattern match
549
673
  - Use pattern match in DataFrame#assign
550
674
  - Use pattern match in DataFrame#rename
551
-
675
+
552
676
  - Accept Array for renamer/assigner in #rename/#assign (#61)
553
677
  - Accept assigner by Arrays in DataFrame#assign
554
678
  - Accept renamer pairs by Arrays in DataFrame#rename
@@ -563,15 +687,15 @@
563
687
  - Intorduce DataFrame#to_wide method
564
688
 
565
689
  - Others
566
-
690
+
567
691
  - Add alias sort_index for array_sort_indices (#59)
568
692
  - Enable :width option in DataFrame#to_s (#62)
569
693
  - Add options to DataFrame#format_table (#62)
570
694
 
571
695
  - Update Documents
572
-
696
+
573
697
  - Add Yard doc for some methods
574
-
698
+
575
699
  - Update Jupyter notebook '61 Examples of Red Amber' (#65)
576
700
 
577
701
  ## [0.1.8] - 2022-08-04 (experimental)
@@ -634,7 +758,7 @@
634
758
  - Show nils.
635
759
  - Show data types.
636
760
  - Refine documents to use new formatter output
637
-
761
+
638
762
  - Simplify options of Vector functions (#46)
639
763
  Vector functions with options use optional argument opt in previous code.
640
764
 
@@ -646,7 +770,7 @@
646
770
  - Add methods to Group
647
771
 
648
772
  - Move parquet and rover to development dependency (#49)
649
-
773
+
650
774
  - Refine text in `DataFrame#to_iruby` (#40)
651
775
 
652
776
  - Add badges in Github site
@@ -743,7 +867,7 @@
743
867
  - Add gem and status badges in README. (#42) [Patch by kojix2]
744
868
 
745
869
  - Thanks
746
-
870
+
747
871
  - kojix2
748
872
 
749
873
  ## [0.1.5] - 2022-06-12 (experimental)
@@ -784,7 +908,7 @@
784
908
  - Change to use DataFrame#map_indices in #[]
785
909
 
786
910
  - Add rounding functions with opts (#21)
787
- - With options :mode and :n_digits
911
+ - With options :mode and :n_digits
788
912
  - :n_digits also can be specified with :multiple option in `Vector#round_to_multiple`
789
913
  - `Vector#round`
790
914
  - `Vector#ceil`
@@ -852,7 +976,7 @@
852
976
  - Add example about TDR (#4)
853
977
  - Separate README to create DataFrame and Vector documents (#12)
854
978
  - Add DataFrame model concept image to README (#12)
855
-
979
+
856
980
  - GitHub site
857
981
  - Switched to use merge on GitHub (not to push merged master) (#1)
858
982
  - Create lifetime issue #3 to show the goal of this project (#3)
@@ -877,7 +1001,7 @@
877
1001
 
878
1002
  - `Vector`
879
1003
  - Add categorization functions
880
-
1004
+
881
1005
  This is an important step to support `slice` method and NA treatment features.
882
1006
  - `is_finite`
883
1007
  - `is_inf`
data/Gemfile CHANGED
@@ -7,7 +7,7 @@ gemspec
7
7
  group :test do
8
8
  gem 'rake'
9
9
 
10
- gem 'red-parquet', '~> 11.0.0'
10
+ gem 'red-parquet', '~> 12.0.0'
11
11
  gem 'rover-df', '~> 0.3.0'
12
12
 
13
13
  gem 'rubocop'
@@ -15,14 +15,13 @@ group :test do
15
15
  gem 'rubocop-rake'
16
16
  gem 'rubocop-rubycw', require: false
17
17
 
18
- gem 'iruby'
19
- gem 'test-unit'
20
- gem 'webrick'
21
- gem 'yard'
22
-
23
18
  gem 'benchmark_driver'
19
+ gem 'iruby'
24
20
  gem 'red-arrow-numo-narray'
25
21
  gem 'red-datasets-arrow'
26
22
  gem 'simplecov'
27
23
  gem 'simplecov-json'
24
+ gem 'test-unit'
25
+ gem 'webrick'
26
+ gem 'yard'
28
27
  end