red_amber 0.4.2 → 0.5.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/.devcontainer/Dockerfile +75 -0
  3. data/.devcontainer/devcontainer.json +38 -0
  4. data/.devcontainer/onCreateCommand.sh +22 -0
  5. data/.rubocop.yml +11 -5
  6. data/CHANGELOG.md +141 -17
  7. data/Gemfile +5 -6
  8. data/README.ja.md +271 -0
  9. data/README.md +52 -31
  10. data/Rakefile +55 -0
  11. data/benchmark/group.yml +12 -5
  12. data/doc/Dev_Containers.ja.md +290 -0
  13. data/doc/Dev_Containers.md +292 -0
  14. data/doc/qmd/examples_of_red_amber.qmd +4596 -0
  15. data/doc/qmd/red-amber.qmd +90 -0
  16. data/docker/Dockerfile +2 -2
  17. data/docker/Gemfile +8 -3
  18. data/docker/docker-compose.yml +1 -1
  19. data/docker/readme.md +5 -5
  20. data/lib/red_amber/data_frame.rb +78 -4
  21. data/lib/red_amber/data_frame_combinable.rb +147 -119
  22. data/lib/red_amber/data_frame_displayable.rb +7 -6
  23. data/lib/red_amber/data_frame_loadsave.rb +1 -1
  24. data/lib/red_amber/data_frame_selectable.rb +51 -2
  25. data/lib/red_amber/data_frame_variable_operation.rb +6 -6
  26. data/lib/red_amber/group.rb +476 -127
  27. data/lib/red_amber/helper.rb +26 -0
  28. data/lib/red_amber/subframes.rb +18 -11
  29. data/lib/red_amber/vector.rb +45 -25
  30. data/lib/red_amber/vector_aggregation.rb +26 -0
  31. data/lib/red_amber/vector_selectable.rb +124 -40
  32. data/lib/red_amber/vector_string_function.rb +279 -0
  33. data/lib/red_amber/vector_unary_element_wise.rb +4 -0
  34. data/lib/red_amber/vector_updatable.rb +28 -0
  35. data/lib/red_amber/version.rb +1 -1
  36. data/lib/red_amber.rb +2 -1
  37. data/red_amber.gemspec +3 -3
  38. metadata +19 -14
  39. data/docker/Gemfile.lock +0 -80
  40. data/docker/example +0 -74
  41. data/docker/notebook/examples_of_red_amber.ipynb +0 -8562
  42. data/docker/notebook/red-amber.ipynb +0 -188
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 498f580bc6fc79e36b199cc3ec62c38638e4df903e956a7a78947d9091748d0c
4
- data.tar.gz: 24c3f25ff47b6bba0af26d1a1c77a80a56c2115e0bd76d26ba3f769f71d3557b
3
+ metadata.gz: b921353dbfcaf634a2e026f541caaf914125482c956fa05886f6a542f6ac2e35
4
+ data.tar.gz: 737fae720227e8e3ef36c2c3142bdb7096b051fd51363a2978079e766081d320
5
5
  SHA512:
6
- metadata.gz: '085caa83703f4b9be0a3baae3318f0bcb606bbf264347ad3db431b29fb59c87e05b142ab1fd7e8a0af0a3d64b0b81055f1e85d82414f773d01456cf5a386df25'
7
- data.tar.gz: 901e7eda6560eb2a9ab378d672b39d3ba08ae47012d8465ce541a465d744ce45d93a451ef2bff8fdf838fc1040b12facc58c87d95c6f6bedb89c5c138b95005c
6
+ metadata.gz: 8f45b7c45725b2da1d9459edaa3ae59a2355e771055c281eb612356703b727315b64ed6963ce02493e6c8ef94194e848fc19b1cf4bbd0620b44ba2e24c1953b0
7
+ data.tar.gz: ca9f134d488168f0d2396f55b118fa7db78d2f6969286896ba2139d891a8a09f8a7fde69141c2c226212748c16f93ece4737f65465f9ea0616e500177806c314
@@ -0,0 +1,75 @@
1
+ # [Choice] debian-11, debian-10, ubuntu-22.04, ubuntu-20.04, ubuntu-18.04
2
+ ARG VARIANT=ubuntu-22.04
3
+
4
+ FROM mcr.microsoft.com/devcontainers/base:${VARIANT}
5
+
6
+ # Set env for tracking that we're running in a devcontainer
7
+ ENV DEVCONTAINER=true
8
+
9
+ RUN set -e; \
10
+ apt-get update; \
11
+ apt-get install -y \
12
+ # To build Ruby
13
+ autoconf \
14
+ bison \
15
+ rustc \
16
+ libssl-dev \
17
+ libyaml-dev \
18
+ libreadline6-dev \
19
+ zlib1g-dev \
20
+ libgmp-dev \
21
+ libncurses5-dev \
22
+ libffi-dev \
23
+ libgdbm6 \
24
+ libgdbm-dev \
25
+ libdb-dev \
26
+ uuid-dev \
27
+ # To install IRuby
28
+ libczmq-dev \
29
+ libzmq3-dev
30
+
31
+ # Install Apache Arrow
32
+ ARG APACHE_ARROW_VERSION=12.0.1-1
33
+ ARG arrow_deb_tmp=/tmp/apache-arrow-apt-source-latest.deb
34
+ ARG arrow_apt_source=https://apache.jfrog.io/artifactory/arrow/ubuntu/pool/jammy/main/a/apache-arrow-apt-source/apache-arrow-apt-source_${APACHE_ARROW_VERSION}_all.deb
35
+ RUN set -e; \
36
+ apt-get update; \
37
+ curl -sfSL -o ${arrow_deb_tmp} ${arrow_apt_source}; \
38
+ apt-get install -y --no-install-recommends ${arrow_deb_tmp}; \
39
+ rm -f ${arrow_deb_tmp}; \
40
+ apt-get update; \
41
+ apt-get install -y \
42
+ libarrow-dev \
43
+ libarrow-glib-dev \
44
+ libarrow-dataset-dev \
45
+ libarrow-flight-dev \
46
+ libparquet-dev \
47
+ libparquet-glib-dev \
48
+ libgandiva-dev \
49
+ libgandiva-glib-dev
50
+
51
+ # This Dockerfile adds a non-root user with sudo access. Use the "remoteUser"
52
+ ARG USERNAME=vscode
53
+ ARG USER_UID=1000
54
+ ARG USER_GID=$USER_UID
55
+
56
+ USER $USERNAME
57
+
58
+ # Install rbenv
59
+ ARG RBENV_RUBY=3.2.2
60
+ RUN set -e; \
61
+ git clone https://github.com/rbenv/rbenv.git $HOME/.rbenv; \
62
+ echo 'eval "$($HOME/.rbenv/bin/rbenv init -)"' >> $HOME/.profile; \
63
+ echo 'eval "$($HOME/.rbenv/bin/rbenv init -)"' >> $HOME/.bashrc; \
64
+ git clone https://github.com/rbenv/ruby-build.git $HOME/.rbenv/plugins/ruby-build
65
+
66
+ # Install Ruby
67
+ # Append `RUBY_CONFIGURE_OPTS=--disable-install-doc ` before rbenv to disable documents
68
+ RUN set -e; \
69
+ $HOME/.rbenv/bin/rbenv install --verbose $RBENV_RUBY; \
70
+ $HOME/.rbenv/bin/rbenv global $RBENV_RUBY
71
+
72
+ # Install IRuby
73
+ RUN set -e; \
74
+ $HOME/.rbenv/bin/rbenv exec gem install iruby; \
75
+ $HOME/.rbenv/bin/rbenv exec iruby register --force
@@ -0,0 +1,38 @@
1
+ {
2
+ "name": "RedAmber",
3
+ "build": {
4
+ "dockerfile": "Dockerfile",
5
+ "cacheFrom": "ghcr.io/red-data-tools/red-amber"
6
+ },
7
+ "features": {
8
+ "ghcr.io/devcontainers/features/python:1": {
9
+ "installTools": true,
10
+ "installJupyterlab": true,
11
+ "version": "3.11"
12
+ },
13
+ // We don't use Ruby feature here
14
+ // "ghcr.io/devcontainers/features/ruby:1": {},
15
+ "ghcr.io/rocker-org/devcontainer-features/quarto-cli:1": {
16
+ "installTinyTex": true,
17
+ "version": "latest"
18
+ },
19
+ "ghcr.io/devcontainers/features/github-cli:1": {}
20
+ },
21
+ // VS Code extentions for Ruby
22
+ "customizations": {
23
+ "vscode": {
24
+ "extensions": [
25
+ "rebornix.Ruby",
26
+ "shopify.ruby-lsp"
27
+ ]
28
+ }
29
+ },
30
+ // Use init process to deal with zombie process
31
+ "init": true,
32
+ // set TZ from local machine's environment defaulting to 'UTC' if not supplied.
33
+ "containerEnv": {
34
+ "RUBYLIB": "/workspaces/red_amber/lib",
35
+ "TZ": "${localEnv:TZ:UTC}"
36
+ },
37
+ "onCreateCommand": ".devcontainer/onCreateCommand.sh"
38
+ }
@@ -0,0 +1,22 @@
1
+ #!/usr/bin/env bash
2
+ set -e
3
+
4
+ # Install language and set timezone
5
+ # You should change here if you use another
6
+ sudo apt-get update
7
+ sudo apt-get install -y language-pack-ja
8
+
9
+ echo 'export LANG=ja_JP.UTF-8' >> $HOME/.bashrc
10
+ echo 'export LANG=ja_JP.UTF-8' >> $HOME/.profile
11
+ echo 'export TZ=Asia/Tokyo' >> $HOME/.bashrc
12
+ echo 'export TZ=Asia/Tokyo' >> $HOME/.profile
13
+
14
+ # Install HaranoAjiFonts
15
+ mkdir -p $HOME/.fonts
16
+ git clone https://github.com/trueroad/HaranoAjiFonts.git $HOME/.fonts/HaranoAjiFonts
17
+
18
+ # Install gems
19
+ bundle install
20
+
21
+ # Create Jupyter Notebooks
22
+ rake quarto:convert
data/.rubocop.yml CHANGED
@@ -52,7 +52,7 @@ Lint/BinaryOperatorWithIdenticalOperands:
52
52
 
53
53
  Lint/Debugger:
54
54
  Exclude:
55
- - 'docker/example'
55
+ - 'bin/example'
56
56
 
57
57
  # Need for test with empty block
58
58
  # Offense count: 1
@@ -76,7 +76,8 @@ Metrics/AbcSize:
76
76
  Max: 30
77
77
  CountRepeatedAttributes: false
78
78
  AllowedMethods: [
79
- 'join', # 51.87
79
+ 'join_merge_keys', # 54.18
80
+ 'join', # 53.1
80
81
  'dataframe_info', # 46.5
81
82
  'format_table', # 84.62
82
83
  'to_long', # 33.66
@@ -87,6 +88,9 @@ Metrics/AbcSize:
87
88
  '[]', # 33.76
88
89
  'split', # 37.35
89
90
  'aggregate', # 38.13
91
+ 'filters', # 33.91
92
+ 'merge_keys', # 32.17
93
+ 'rename_keys', # 31.64
90
94
  ]
91
95
 
92
96
  # Max: 25
@@ -139,10 +143,12 @@ Metrics/MethodLength:
139
143
  Max: 30
140
144
  AllowedMethods: [
141
145
  'join', # 47
142
- 'dataframe_info', # 33
146
+ 'join_merge_keys', # 41
143
147
  'format_table', # 53
144
148
  'slice_by', # 38
145
149
  'assign_update', # 35
150
+ 'summarize', # 35
151
+ 'dataframe_info', # 33
146
152
  'drop', # 32
147
153
  'aggregate', # 31
148
154
  ]
@@ -219,7 +225,7 @@ Naming/PredicateName:
219
225
  Rubycw/Rubycw:
220
226
  Exclude:
221
227
  - 'test/**/*'
222
- - 'docker/example'
228
+ - 'bin/example'
223
229
 
224
230
  # Offense count: 16
225
231
  # This cop supports safe autocorrection (--autocorrect).
@@ -236,7 +242,7 @@ Style/SlicingWithRange:
236
242
 
237
243
  Style/MixinUsage:
238
244
  Exclude:
239
- - 'docker/example'
245
+ - 'bin/example'
240
246
 
241
247
  # Necessary to Vector < 0 element-wise comparison
242
248
  # Offense count: 5
data/CHANGELOG.md CHANGED
@@ -1,10 +1,134 @@
1
+ ## [0.5.1] - 2023-08-18
2
+
3
+ Docker environment is replaced by Dev Container,
4
+ and Jupyter Notebooks will be created from qmd files.
5
+
6
+ - Breaking change
7
+
8
+ - Bug fixes
9
+ - Fix timestamp test to set TZ locally (#249)
10
+ - Fix regexp for beginning of String (#251)
11
+ - Fix loading bin/Gemfile locally in bin/jupyter script (#261)
12
+
13
+ - New features and improvements
14
+ - Support sort and null_placement options in Vector#rank (#265)
15
+ - Add Vector#find_substring method (#270)
16
+ - Add Group#one method (#274)
17
+ - Add Group#all and #any method (#274)
18
+ - Add Group#median method (#274)
19
+ - Add Group#count_uniq method (#274)
20
+ - Introduce Dev Container environment
21
+ - Introduce Devcontainer environment (#253)
22
+ - Change lifecycle script from postCreate to onCreate (#253)
23
+ - Move example to bin (#253)
24
+ - Fix Python and Ruby versions in Dev Container (#254)
25
+ - Add locale and timezone settings (#256)
26
+ - Add quarto from devcontainer feature (#259)
27
+ - Install HaranoAjiFonts as default Tex font (#259)
28
+
29
+ - Refactoring
30
+ - Rename boolean methods in VectorStringFunction (#263)
31
+ - Refine Vector#inspect to show wheather chunked or not (#267)
32
+ - Add an alias Group#count_all for #group_count (#274)
33
+
34
+ - Improve in tests/CI
35
+ - Create rake commands for Notebook convert/test (#269)
36
+ - Fix rubocop warning of forwarding arguments in assign_update (#269)
37
+ - Use rake to start example script (#269)
38
+ - Add test in Vector#rank to cover illegal rank option error (#271)
39
+ - Add bundle install to Rakefile (#276)
40
+ - Use Dockerfile to create dev container (#276)
41
+ - Save image to ghcr in ci (#276)
42
+
43
+ - Documentation and Example
44
+ - YARD
45
+ - Update Docker Environment (#245)
46
+ - Refine jupyter notebook environment (#253)
47
+ - Refine yard in Group aggregations (#274)
48
+ - Fix yard of Vector#rank (#269)
49
+ - Fix yard of Group (#269)
50
+ - Notebook
51
+ - Start source management for jupyter notebook by qmd (#259)
52
+ - Don't create ipynb if it exists (#261)
53
+ - Add Group methods (125 in total) (#269)
54
+ - Add ArrowFunction (126 in total) (#269)
55
+ - Add DataFrame#auto_cast (127 in total) (#269)
56
+ - Update required version in examples notebook (#269)
57
+ - Update examples_of_red_amber (#269)
58
+ - Update red-amber.qmd (#269)
59
+
60
+ - GitHub site
61
+ - Fix broken link in README/README.ja by Viktorius Suwandi (#262)
62
+ - Change description in gemspec (#254)
63
+ - Add documents for Dev Container (#254)
64
+
65
+ - Thanks
66
+ - Viktorius Suwandi
67
+
68
+ ## [0.5.0] - 2023-05-24
69
+
70
+ - Breaking change
71
+ - Use non keyword argument in #sub_by_value (#219)
72
+ - Upgrade dependency to Arrow 12.0.0 (#238)
73
+ - right_join will output columns as same order as Red Arrow.
74
+ - DataFrame#join will not force ordering of original column by default
75
+ - Join with type, such as full_join, sort after join by default
76
+
77
+ - Bug fixes
78
+ - Use truncate in Vector#sample(float) (#229)
79
+ - Support options in DataFrame#tdra (#231)
80
+ - Fix printing table with non-ascii strings (#233)
81
+ - Fix join for Arrow 12.0.0
82
+
83
+ - New features and improvements
84
+ - Add a singleton method Vector.[] (#218)
85
+ - Add an alias #sub_group (#219)
86
+ - Accept Group#summarize{Hash} to rename aggregated columns (#219)
87
+ - Add Group#group_frame (#219)
88
+ - Add Vector#cast (#224)
89
+ - Add Vector#fill_nil(value) (#226)
90
+ - Add Vector#one (#227)
91
+ - Add Vector#mode (#228)
92
+ - Add DataFrame#propagate (#235)
93
+ - Add DataFrame#sample (#237)
94
+ - Add DataFrame#shuffle (#237)
95
+ - Support RankOptions in Vector#rank (#239)
96
+ - Introduce MatchSubstringOptions family in Vector (#241)
97
+ - Introduce Vector#match_substring?
98
+ - Add Vector#end_with?, #start_with? method
99
+ - Add Vector#match_like?
100
+ - Add Vector#count_substring method
101
+
102
+ - Refactoring
103
+ - Refine Group and SubFrames function (#219)
104
+ - Refine Group#group_count
105
+ - Use Acero in Group#filters
106
+ - Refine Group#filters, not using Acero
107
+ - Refine Group#summarize(array)
108
+ - Use Acero for renaming columns in join (#238)
109
+ - Use index kernel with IndexOptions introduced in 12.0.0 (#240)
110
+
111
+ - Improve in tests/CI
112
+ - Use Fedra 39 Rawhide in CI (#238)
113
+
114
+ - Documentation and Example
115
+ - Add missing yard documents for SubFrames::Selectors (#219)
116
+ - Update docker/example (#219)
117
+ - Update Gemfile in docker (#219)
118
+ - Add README.ja.md (#242)
119
+
120
+ - GitHub site
121
+ - Update link of Red Data Tools Chat to matrix (#242)
122
+
123
+ - Thanks
124
+
1
125
  ## [0.4.2] - 2023-04-02
2
126
 
3
127
  - Breaking change
4
128
 
5
129
  - Bug fixes
6
130
  - Fix Vector#modulo, #fdiv, #remainder (#203)
7
-
131
+
8
132
  - New features and improvements
9
133
  - Update SubFrames#take to return SubFrames (#212)
10
134
 
@@ -49,7 +173,7 @@
49
173
  - Fix Vector#rank when data is ChunkedArray (#198)
50
174
  - Fix Vector element-wise functions with nil as scalar (#198)
51
175
  - Support :force_order for all methods of join family (#199)
52
- - Supports :force_order option to force sorting after join for all #join familiy.
176
+ - Supports :force_order option to force sorting after join for all #join familiy.
53
177
  - This will valuable in some cases such as large dataframes.
54
178
  - Ensure baseframe's schema for SubFrames (#200)
55
179
 
@@ -451,11 +575,11 @@
451
575
  - Move binder support to heronshoes/docker-stacks repository.
452
576
  - Update README notebook on binder.
453
577
  - Add examples_of_RedAmber notebook on binder.
454
-
578
+
455
579
  - Start to use discussions.
456
580
 
457
581
  - Thanks
458
-
582
+
459
583
  - Kenta Murata
460
584
 
461
585
  ## [0.2.1] - 2022-09-07
@@ -504,7 +628,7 @@
504
628
  - Update Jupyter Notebook `71 examples of RedAmber`
505
629
 
506
630
  - Thanks
507
-
631
+
508
632
  - Kenta Murata
509
633
 
510
634
  ## [0.2.0] - 2022-08-15
@@ -519,7 +643,7 @@
519
643
  - Remove optional `require` for rover (#55)
520
644
  Fix DataFrame.new for argument with Rover::DataFrame.
521
645
  - Fix occasional failure in CI (#59)
522
- Sometimes the CI test fails. I added -dev dependency
646
+ Sometimes the CI test fails. I added -dev dependency
523
647
  in Arrow install by apt, not doing in bundler.
524
648
 
525
649
  - Fix calling :take in V#[] (#56)
@@ -537,7 +661,7 @@
537
661
  - Upgrade to Arrow 9.0.0 (#59)
538
662
  - Add Vector#quantile method (#59)
539
663
  Arrow::QuantileOptions has supported in Arrow GLib 9.0.0 (ARROW-16623, Thanks!)
540
-
664
+
541
665
  - Add Vector#quantiles (#62)
542
666
 
543
667
  - Add DataFrame#each_row (#56)
@@ -548,7 +672,7 @@
548
672
  - Refine DataFrame.new to use pattern match
549
673
  - Use pattern match in DataFrame#assign
550
674
  - Use pattern match in DataFrame#rename
551
-
675
+
552
676
  - Accept Array for renamer/assigner in #rename/#assign (#61)
553
677
  - Accept assigner by Arrays in DataFrame#assign
554
678
  - Accept renamer pairs by Arrays in DataFrame#rename
@@ -563,15 +687,15 @@
563
687
  - Intorduce DataFrame#to_wide method
564
688
 
565
689
  - Others
566
-
690
+
567
691
  - Add alias sort_index for array_sort_indices (#59)
568
692
  - Enable :width option in DataFrame#to_s (#62)
569
693
  - Add options to DataFrame#format_table (#62)
570
694
 
571
695
  - Update Documents
572
-
696
+
573
697
  - Add Yard doc for some methods
574
-
698
+
575
699
  - Update Jupyter notebook '61 Examples of Red Amber' (#65)
576
700
 
577
701
  ## [0.1.8] - 2022-08-04 (experimental)
@@ -634,7 +758,7 @@
634
758
  - Show nils.
635
759
  - Show data types.
636
760
  - Refine documents to use new formatter output
637
-
761
+
638
762
  - Simplify options of Vector functions (#46)
639
763
  Vector functions with options use optional argument opt in previous code.
640
764
 
@@ -646,7 +770,7 @@
646
770
  - Add methods to Group
647
771
 
648
772
  - Move parquet and rover to development dependency (#49)
649
-
773
+
650
774
  - Refine text in `DataFrame#to_iruby` (#40)
651
775
 
652
776
  - Add badges in Github site
@@ -743,7 +867,7 @@
743
867
  - Add gem and status badges in README. (#42) [Patch by kojix2]
744
868
 
745
869
  - Thanks
746
-
870
+
747
871
  - kojix2
748
872
 
749
873
  ## [0.1.5] - 2022-06-12 (experimental)
@@ -784,7 +908,7 @@
784
908
  - Change to use DataFrame#map_indices in #[]
785
909
 
786
910
  - Add rounding functions with opts (#21)
787
- - With options :mode and :n_digits
911
+ - With options :mode and :n_digits
788
912
  - :n_digits also can be specified with :multiple option in `Vector#round_to_multiple`
789
913
  - `Vector#round`
790
914
  - `Vector#ceil`
@@ -852,7 +976,7 @@
852
976
  - Add example about TDR (#4)
853
977
  - Separate README to create DataFrame and Vector documents (#12)
854
978
  - Add DataFrame model concept image to README (#12)
855
-
979
+
856
980
  - GitHub site
857
981
  - Switched to use merge on GitHub (not to push merged master) (#1)
858
982
  - Create lifetime issue #3 to show the goal of this project (#3)
@@ -877,7 +1001,7 @@
877
1001
 
878
1002
  - `Vector`
879
1003
  - Add categorization functions
880
-
1004
+
881
1005
  This is an important step to support `slice` method and NA treatment features.
882
1006
  - `is_finite`
883
1007
  - `is_inf`
data/Gemfile CHANGED
@@ -7,7 +7,7 @@ gemspec
7
7
  group :test do
8
8
  gem 'rake'
9
9
 
10
- gem 'red-parquet', '~> 11.0.0'
10
+ gem 'red-parquet', '~> 12.0.0'
11
11
  gem 'rover-df', '~> 0.3.0'
12
12
 
13
13
  gem 'rubocop'
@@ -15,14 +15,13 @@ group :test do
15
15
  gem 'rubocop-rake'
16
16
  gem 'rubocop-rubycw', require: false
17
17
 
18
- gem 'iruby'
19
- gem 'test-unit'
20
- gem 'webrick'
21
- gem 'yard'
22
-
23
18
  gem 'benchmark_driver'
19
+ gem 'iruby'
24
20
  gem 'red-arrow-numo-narray'
25
21
  gem 'red-datasets-arrow'
26
22
  gem 'simplecov'
27
23
  gem 'simplecov-json'
24
+ gem 'test-unit'
25
+ gem 'webrick'
26
+ gem 'yard'
28
27
  end