daru_lite 0.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (73) hide show
  1. checksums.yaml +4 -4
  2. data/.github/ISSUE_TEMPLATE/bug_report.md +38 -0
  3. data/.github/ISSUE_TEMPLATE/feature_request.md +20 -0
  4. data/.github/workflows/ci.yml +20 -0
  5. data/.rubocop_todo.yml +35 -33
  6. data/README.md +19 -115
  7. data/daru_lite.gemspec +1 -0
  8. data/lib/daru_lite/data_frame/aggregatable.rb +165 -0
  9. data/lib/daru_lite/data_frame/calculatable.rb +140 -0
  10. data/lib/daru_lite/data_frame/convertible.rb +107 -0
  11. data/lib/daru_lite/data_frame/duplicatable.rb +64 -0
  12. data/lib/daru_lite/data_frame/fetchable.rb +301 -0
  13. data/lib/daru_lite/data_frame/filterable.rb +144 -0
  14. data/lib/daru_lite/data_frame/i_o_able.rb +179 -0
  15. data/lib/daru_lite/data_frame/indexable.rb +168 -0
  16. data/lib/daru_lite/data_frame/iterable.rb +339 -0
  17. data/lib/daru_lite/data_frame/joinable.rb +152 -0
  18. data/lib/daru_lite/data_frame/missable.rb +75 -0
  19. data/lib/daru_lite/data_frame/pivotable.rb +108 -0
  20. data/lib/daru_lite/data_frame/queryable.rb +67 -0
  21. data/lib/daru_lite/data_frame/setable.rb +109 -0
  22. data/lib/daru_lite/data_frame/sortable.rb +241 -0
  23. data/lib/daru_lite/dataframe.rb +142 -2355
  24. data/lib/daru_lite/index/index.rb +13 -0
  25. data/lib/daru_lite/maths/statistics/vector.rb +1 -1
  26. data/lib/daru_lite/vector/aggregatable.rb +9 -0
  27. data/lib/daru_lite/vector/calculatable.rb +78 -0
  28. data/lib/daru_lite/vector/convertible.rb +77 -0
  29. data/lib/daru_lite/vector/duplicatable.rb +17 -0
  30. data/lib/daru_lite/vector/fetchable.rb +175 -0
  31. data/lib/daru_lite/vector/filterable.rb +128 -0
  32. data/lib/daru_lite/vector/indexable.rb +77 -0
  33. data/lib/daru_lite/vector/iterable.rb +95 -0
  34. data/lib/daru_lite/vector/joinable.rb +17 -0
  35. data/lib/daru_lite/vector/missable.rb +124 -0
  36. data/lib/daru_lite/vector/queryable.rb +45 -0
  37. data/lib/daru_lite/vector/setable.rb +47 -0
  38. data/lib/daru_lite/vector/sortable.rb +113 -0
  39. data/lib/daru_lite/vector.rb +36 -932
  40. data/lib/daru_lite/version.rb +1 -1
  41. data/spec/data_frame/aggregatable_example.rb +65 -0
  42. data/spec/data_frame/buildable_example.rb +109 -0
  43. data/spec/data_frame/calculatable_example.rb +135 -0
  44. data/spec/data_frame/convertible_example.rb +180 -0
  45. data/spec/data_frame/duplicatable_example.rb +111 -0
  46. data/spec/data_frame/fetchable_example.rb +476 -0
  47. data/spec/data_frame/filterable_example.rb +250 -0
  48. data/spec/data_frame/indexable_example.rb +221 -0
  49. data/spec/data_frame/iterable_example.rb +465 -0
  50. data/spec/data_frame/joinable_example.rb +106 -0
  51. data/spec/data_frame/missable_example.rb +47 -0
  52. data/spec/data_frame/pivotable_example.rb +297 -0
  53. data/spec/data_frame/queryable_example.rb +92 -0
  54. data/spec/data_frame/setable_example.rb +482 -0
  55. data/spec/data_frame/sortable_example.rb +350 -0
  56. data/spec/dataframe_spec.rb +181 -3243
  57. data/spec/index/index_spec.rb +8 -0
  58. data/spec/vector/aggregatable_example.rb +27 -0
  59. data/spec/vector/calculatable_example.rb +82 -0
  60. data/spec/vector/convertible_example.rb +126 -0
  61. data/spec/vector/duplicatable_example.rb +48 -0
  62. data/spec/vector/fetchable_example.rb +463 -0
  63. data/spec/vector/filterable_example.rb +165 -0
  64. data/spec/vector/indexable_example.rb +201 -0
  65. data/spec/vector/iterable_example.rb +111 -0
  66. data/spec/vector/joinable_example.rb +25 -0
  67. data/spec/vector/missable_example.rb +88 -0
  68. data/spec/vector/queryable_example.rb +91 -0
  69. data/spec/vector/setable_example.rb +300 -0
  70. data/spec/vector/sortable_example.rb +242 -0
  71. data/spec/vector_spec.rb +111 -1805
  72. metadata +102 -3
  73. data/.github/ISSUE_TEMPLATE.md +0 -18
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 37e2e2dcfa3ec410a0633e843d645485cecddb76690373a383e26740375ba658
4
- data.tar.gz: 6f3c87500e54eef1753b604676e44224c8ddd64197ef39e7c1a1d3cdd5d3a33b
3
+ metadata.gz: 1fca8a59ee849230424502a8ffa2f986134ccf522d15d53ab3807c22b64b30f8
4
+ data.tar.gz: 8c4e8048ea8171c463b048ac9dff8b86a8b19e3ec5dd62f16bf72311e7b03b38
5
5
  SHA512:
6
- metadata.gz: f7a617ae7bdeb75e3368dfcfd4cb1f6ba96c31940fe2f94e73fc0ddca5e34754d4cc20855e249d166c72c957067b64c5de4220b79003424c2288ad560bfc4e2c
7
- data.tar.gz: d22eb2e97a63a1a5595b0e70bc6e601861b838ab82742239fd7244d9de5d659afec85d21ed03a8b75c4a7e532b57ae6496147ee74ecb48dfe7f3beb54a231866
6
+ metadata.gz: 403d6cfe869dcd152f083ea0878be37f6a8b40212f6ba5f80ece21bcadf51a4f13471f529bbddcf66b593568f31ec52f3e308c39160f0bd87bac9af6d95b30f6
7
+ data.tar.gz: dfbc2d7b5e63c54980c704c0df3d96ae8d079b921fc0ff51a34f109126a2a382d531457321737e83a2b03bc114b741e3018d0beb9cb00554aa822345d94f3144
@@ -0,0 +1,38 @@
1
+ ---
2
+ name: Bug report
3
+ about: Create a report to help us improve
4
+ title: ''
5
+ labels: ''
6
+ assignees: ''
7
+
8
+ ---
9
+
10
+ **Describe the bug**
11
+ A clear and concise description of what the bug is.
12
+
13
+ **To Reproduce**
14
+ Steps to reproduce the behavior:
15
+ 1. Go to '...'
16
+ 2. Click on '....'
17
+ 3. Scroll down to '....'
18
+ 4. See error
19
+
20
+ **Expected behavior**
21
+ A clear and concise description of what you expected to happen.
22
+
23
+ **Screenshots**
24
+ If applicable, add screenshots to help explain your problem.
25
+
26
+ **Desktop (please complete the following information):**
27
+ - OS: [e.g. iOS]
28
+ - Browser [e.g. chrome, safari]
29
+ - Version [e.g. 22]
30
+
31
+ **Smartphone (please complete the following information):**
32
+ - Device: [e.g. iPhone6]
33
+ - OS: [e.g. iOS8.1]
34
+ - Browser [e.g. stock browser, safari]
35
+ - Version [e.g. 22]
36
+
37
+ **Additional context**
38
+ Add any other context about the problem here.
@@ -0,0 +1,20 @@
1
+ ---
2
+ name: Feature request
3
+ about: Suggest an idea for this project
4
+ title: ''
5
+ labels: ''
6
+ assignees: ''
7
+
8
+ ---
9
+
10
+ **Is your feature request related to a problem? Please describe.**
11
+ A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12
+
13
+ **Describe the solution you'd like**
14
+ A clear and concise description of what you want to happen.
15
+
16
+ **Describe alternatives you've considered**
17
+ A clear and concise description of any alternative solutions or features you've considered.
18
+
19
+ **Additional context**
20
+ Add any other context or screenshots about the feature request here.
@@ -1,6 +1,15 @@
1
1
  name: CI
2
2
  on: [push]
3
3
 
4
+ env:
5
+ CC_TEST_REPORTER_ID: ${{secrets.CC_TEST_REPORTER_ID}}
6
+ # `github.ref` points to the *merge commit* when running tests on a pull request, which will be a commit
7
+ # that doesn't exists in our code base. Since this workflow triggers from a PR, we use the HEAD SHA instead.
8
+ #
9
+ # NOTE: These are both used by Code Climate (cc-test-reporter).
10
+ GIT_COMMIT_SHA: ${{github.event.pull_request.head.sha}}
11
+ GIT_BRANCH: ${{github.head_ref}}
12
+
4
13
  jobs:
5
14
  lint:
6
15
  runs-on: ubuntu-latest
@@ -29,5 +38,16 @@ jobs:
29
38
  with:
30
39
  ruby-version: ${{ matrix.ruby-version }}
31
40
  bundler-cache: true # runs 'bundle install' and caches installed gems automatically
41
+ - name: "Download cc-test-reporter from codeclimate.com"
42
+ run: |
43
+ curl -L https://codeclimate.com/downloads/test-reporter/test-reporter-latest-linux-amd64 > ./cc-test-reporter
44
+ chmod +x ./cc-test-reporter
45
+ - name: "Report to Code Climate that we will send a coverage report."
46
+ run: ./cc-test-reporter before-build
32
47
  - name: Run tests
33
48
  run: bundle exec rspec
49
+ - name: Upload code coverage to Code Climate
50
+ run: |
51
+ ./cc-test-reporter after-build \
52
+ --coverage-input-type simplecov \
53
+ ./coverage/.resultset.json
data/.rubocop_todo.yml CHANGED
@@ -1,11 +1,19 @@
1
1
  # This configuration was generated by
2
2
  # `rubocop --auto-gen-config`
3
- # on 2022-08-16 13:20:50 UTC using RuboCop version 1.35.0.
3
+ # on 2024-03-03 13:59:21 UTC using RuboCop version 1.60.2.
4
4
  # The point is for the user to remove these configuration records
5
5
  # one by one as the offenses are removed from the code base.
6
6
  # Note that changes in the inspected code, or installation of new
7
7
  # versions of RuboCop, may require this file to be generated again.
8
8
 
9
+ # Offense count: 1
10
+ # This cop supports safe autocorrection (--autocorrect).
11
+ # Configuration parameters: EnforcedStyle, IndentationWidth.
12
+ # SupportedStyles: aligned, indented
13
+ Layout/LineEndStringConcatenationIndentation:
14
+ Exclude:
15
+ - 'lib/daru_lite/data_frame/indexable.rb'
16
+
9
17
  # Offense count: 1
10
18
  # Configuration parameters: AllowComments.
11
19
  Lint/EmptyClass:
@@ -13,6 +21,7 @@ Lint/EmptyClass:
13
21
  - 'lib/daru_lite/accessors/mdarray_wrapper.rb'
14
22
 
15
23
  # Offense count: 5
24
+ # Configuration parameters: AllowedParentClasses.
16
25
  Lint/MissingSuper:
17
26
  Exclude:
18
27
  - 'lib/daru_lite/date_time/offsets.rb'
@@ -20,61 +29,50 @@ Lint/MissingSuper:
20
29
  - 'lib/daru_lite/index/index.rb'
21
30
  - 'lib/daru_lite/index/multi_index.rb'
22
31
 
23
- # Offense count: 6
32
+ # Offense count: 5
33
+ # This cop supports safe autocorrection (--autocorrect).
24
34
  # Configuration parameters: CheckForMethodsWithNoSideEffects.
25
35
  Lint/Void:
26
36
  Exclude:
27
37
  - 'lib/daru_lite/category.rb'
28
- - 'lib/daru_lite/dataframe.rb'
38
+ - 'lib/daru_lite/data_frame/indexable.rb'
29
39
  - 'lib/daru_lite/vector.rb'
30
40
 
31
- # Offense count: 40
32
- # Configuration parameters: AllowedMethods, AllowedPatterns, IgnoredMethods, CountRepeatedAttributes.
41
+ # Offense count: 41
42
+ # Configuration parameters: AllowedMethods, AllowedPatterns, CountRepeatedAttributes.
33
43
  Metrics/AbcSize:
34
44
  Max: 34
35
45
 
36
- # Offense count: 3
46
+ # Offense count: 5
37
47
  # Configuration parameters: CountComments, CountAsOne.
38
48
  Metrics/ClassLength:
39
- Max: 189
49
+ Max: 188
40
50
 
41
51
  # Offense count: 6
42
- # Configuration parameters: AllowedMethods, AllowedPatterns, IgnoredMethods.
52
+ # Configuration parameters: AllowedMethods, AllowedPatterns.
43
53
  Metrics/CyclomaticComplexity:
44
54
  Max: 9
45
55
 
46
- # Offense count: 61
47
- # Configuration parameters: CountComments, CountAsOne, ExcludedMethods, AllowedMethods, AllowedPatterns, IgnoredMethods.
56
+ # Offense count: 60
57
+ # Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns.
48
58
  Metrics/MethodLength:
49
59
  Max: 15
50
60
 
51
- # Offense count: 2
61
+ # Offense count: 4
52
62
  # Configuration parameters: CountComments, CountAsOne.
53
63
  Metrics/ModuleLength:
54
64
  Max: 190
55
65
 
56
66
  # Offense count: 4
57
- # Configuration parameters: AllowedMethods, AllowedPatterns, IgnoredMethods.
67
+ # Configuration parameters: AllowedMethods, AllowedPatterns.
58
68
  Metrics/PerceivedComplexity:
59
69
  Max: 10
60
70
 
61
- # Offense count: 72
71
+ # Offense count: 66
62
72
  # Configuration parameters: MinNameLength, AllowNamesEndingInNumbers, AllowedNames, ForbiddenNames.
63
- # AllowedNames: at, by, db, id, in, io, ip, of, on, os, pp, to
73
+ # AllowedNames: as, at, by, cc, db, id, if, in, io, ip, of, on, os, pp, to
64
74
  Naming/MethodParameterName:
65
- Exclude:
66
- - 'lib/daru_lite/category.rb'
67
- - 'lib/daru_lite/core/group_by.rb'
68
- - 'lib/daru_lite/core/merge.rb'
69
- - 'lib/daru_lite/core/query.rb'
70
- - 'lib/daru_lite/dataframe.rb'
71
- - 'lib/daru_lite/date_time/index.rb'
72
- - 'lib/daru_lite/date_time/offsets.rb'
73
- - 'lib/daru_lite/extensions/which_dsl.rb'
74
- - 'lib/daru_lite/io/io.rb'
75
- - 'lib/daru_lite/maths/statistics/dataframe.rb'
76
- - 'lib/daru_lite/maths/statistics/vector.rb'
77
- - 'lib/daru_lite/vector.rb'
75
+ Enabled: false
78
76
 
79
77
  # Offense count: 5
80
78
  # Configuration parameters: NamePrefix, ForbiddenPrefixes, AllowedMethods, MethodDefinitionMacros.
@@ -85,13 +83,14 @@ Naming/MethodParameterName:
85
83
  Naming/PredicateName:
86
84
  Exclude:
87
85
  - 'spec/**/*'
88
- - 'lib/daru_lite/dataframe.rb'
86
+ - 'lib/daru_lite/data_frame/missable.rb'
87
+ - 'lib/daru_lite/data_frame/queryable.rb'
89
88
  - 'lib/daru_lite/vector.rb'
90
89
 
91
90
  # Offense count: 5
92
91
  Security/MarshalLoad:
93
92
  Exclude:
94
- - 'lib/daru_lite/dataframe.rb'
93
+ - 'lib/daru_lite/data_frame/i_o_able.rb'
95
94
  - 'lib/daru_lite/date_time/index.rb'
96
95
  - 'lib/daru_lite/index/index.rb'
97
96
  - 'lib/daru_lite/io/io.rb'
@@ -102,7 +101,7 @@ Style/ClassVars:
102
101
  Exclude:
103
102
  - 'lib/daru_lite.rb'
104
103
 
105
- # Offense count: 44
104
+ # Offense count: 58
106
105
  # Configuration parameters: AllowedConstants.
107
106
  Style/Documentation:
108
107
  Enabled: false
@@ -113,6 +112,10 @@ Style/MapToHash:
113
112
  Exclude:
114
113
  - 'lib/daru_lite/category.rb'
115
114
  - 'lib/daru_lite/core/group_by.rb'
115
+ - 'lib/daru_lite/data_frame/convertible.rb'
116
+ - 'lib/daru_lite/data_frame/duplicatable.rb'
117
+ - 'lib/daru_lite/data_frame/fetchable.rb'
118
+ - 'lib/daru_lite/data_frame/joinable.rb'
116
119
  - 'lib/daru_lite/dataframe.rb'
117
120
 
118
121
  # Offense count: 1
@@ -125,7 +128,7 @@ Style/MultilineBlockChain:
125
128
  # AllowedMethods: respond_to_missing?
126
129
  Style/OptionalBooleanParameter:
127
130
  Exclude:
128
- - 'lib/daru_lite/dataframe.rb'
131
+ - 'lib/daru_lite/data_frame/convertible.rb'
129
132
  - 'lib/daru_lite/maths/statistics/vector.rb'
130
133
  - 'lib/daru_lite/vector.rb'
131
134
 
@@ -133,5 +136,4 @@ Style/OptionalBooleanParameter:
133
136
  # This cop supports unsafe autocorrection (--autocorrect-all).
134
137
  Style/RedundantSelfAssignment:
135
138
  Exclude:
136
- - 'lib/daru_lite/dataframe.rb'
137
-
139
+ - 'lib/daru_lite/data_frame/joinable.rb'
data/README.md CHANGED
@@ -1,50 +1,21 @@
1
- # daru - Data Analysis in RUby
1
+ # daru Lite - Data Analysis in RUby Lite
2
2
 
3
- [![Gem Version](https://badge.fury.io/rb/daru.svg)](http://badge.fury.io/rb/daru)
4
- [![Build Status](https://travis-ci.org/SciRuby/daru.svg?branch=master)](https://travis-ci.org/SciRuby/daru)
5
- [![Gitter](https://badges.gitter.im/v0dro/daru.svg)](https://gitter.im/v0dro/daru?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge)
6
- [![Open Source Helpers](https://www.codetriage.com/sciruby/daru/badges/users.svg)](https://www.codetriage.com/sciruby/daru)
3
+ Simple, straightforward DataFrames for Ruby
7
4
 
8
- ## Introduction
9
-
10
- daru (Data Analysis in RUby) is a library for storage, analysis, manipulation and visualization of data in Ruby.
11
-
12
- daru makes it easy and intuitive to process data predominantly through 2 data structures:
13
- `DaruLite::DataFrame` and `DaruLite::Vector`. Written in pure Ruby works with all ruby implementations.
14
- Tested with MRI 2.5.1 and 2.7.1.
15
-
16
- ## daru plugin gems
17
-
18
- - **[daru-view](https://github.com/SciRuby/daru-view)**
19
-
20
- daru-view is for easy and interactive plotting in web application & IRuby
21
- notebook. It can work in any Ruby web application frameworks like Rails, Sinatra, Nanoc and hopefully in others too.
22
-
23
- Articles/Blogs, that summarize powerful features of daru-view:
24
-
25
- * [GSoC 2017 daru-view](http://sciruby.com/blog/2017/09/01/gsoc-2017-data-visualization-using-daru-view/)
26
- * [GSoC 2018 Progress Report](https://github.com/SciRuby/daru-view/wiki/GSoC-2018---Progress-Report)
27
- * [HighCharts Official blog post regarding daru-view](https://www.highcharts.com/blog/post/i-am-ruby-developer-how-can-i-use-highcharts/)
28
-
29
- - **[daru-io](https://github.com/SciRuby/daru-io)**
30
-
31
- This gem extends support for many Import and Export methods of `DaruLite::DataFrame`. This gem is intended to help Rubyists who are into Data Analysis or Web Development, by serving as a general purpose conversion library that takes input in one format (say, JSON) and converts it another format (say, Avro) while also making it incredibly easy to getting started on analyzing data with daru. One can read more in [SciRuby/blog/daru-io](http://sciruby.com/blog/2017/08/29/gsoc-2017-support-to-import-export-of-more-formats/).
5
+ [![Build Status](https://github.com/pollandroll/daru_lite/actions/workflows/build.yml/badge.svg)](https://github.com/pollandroll/daru_lite/actions)
6
+ [![Gem Version](https://img.shields.io/gem/v/daru_lite.svg)](https://rubygems.org/gems/daru_lite)
7
+ [![Maintainability](https://api.codeclimate.com/v1/badges/f87d4ed10b5731e50184/maintainability)](https://codeclimate.com/github/pollandroll/daru_lite/maintainability)
8
+ [![Test Coverage](https://api.codeclimate.com/v1/badges/f87d4ed10b5731e50184/test_coverage)](https://codeclimate.com/github/pollandroll/daru_lite/test_coverage)
32
9
 
10
+ ## Introduction
33
11
 
34
- ## Features
12
+ daru Lite is a library for data analysis and manipulation in Ruby.
35
13
 
36
- * Data structures:
37
- - Vector - A basic 1-D vector.
38
- - DataFrame - A 2-D spreadsheet-like structure for manipulating and storing data sets. This is daru's primary data structure.
39
- * Compatible with [IRuby notebook](https://github.com/SciRuby/iruby), [statsample](https://github.com/SciRuby/statsample), [statsample-glm](https://github.com/SciRuby/statsample-glm) and [statsample-timeseries](https://github.com/SciRuby/statsample-timeseries).
40
- * Support for time series.
41
- * Singly and hierarchically indexed data structures.
42
- * Flexible and intuitive API for manipulation and analysis of data.
43
- * Easy plotting, statistics and arithmetic.
44
- * Plentiful iterators.
45
- * Easy splitting, aggregation and grouping of data.
46
- * Quickly reducing data with pivot tables for quick data summary.
47
- * Import and export data from and to Excel, CSV, SQL Databases, ActiveRecord and plain text files.
14
+ This project started as fork of [Daru](https://github.com/SciRuby/daru) with the objective to provide :
15
+ - a simple and yet powerfull interface to manipulate data using DataFrames
16
+ - a API consistent with the one historically provided by daru
17
+ - a focus on the core features around data manipulation, droped several cumbersome daru dependencies and the associated features : notably N-Matrix, GSL, R, imagemagick and all plotting libraries. The current project has no major dependencies
18
+ - build a future-proof library that can safely be used in production
48
19
 
49
20
  ## Installation
50
21
 
@@ -52,62 +23,14 @@ This gem extends support for many Import and Export methods of `DaruLite::DataFr
52
23
  $ gem install daru_lite
53
24
  ```
54
25
 
55
- ## Notebooks
56
-
57
- #### Notebooks on most use cases
58
-
59
- * [Overview of most daru functions](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Daru%20Demo.ipynb)
60
- * [Basic Creation of Vectors and DataFrame](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Creation%20of%20Vector%20and%20DataFrame.ipynb)
61
- * [Detailed Usage of DaruLite::Vector](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Usage%20of%20Vector.ipynb)
62
- * [Detailed Usage of DaruLite::DataFrame](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Usage%20of%20DataFrame.ipynb)
63
- * [Searching and combining data in daru](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Searching%20and%20Combining%20Data.ipynb)
64
- * [Grouping, Splitting and Pivoting Data](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Grouping%2C%20Splitting%20and%20Pivoting.ipynb)
65
- * [Usage of Categorical Data](http://nbviewer.jupyter.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Categorical%20Data/Categorical%20Data.ipynb)
66
-
67
- #### Visualization
68
- * [Visualizing Data With DaruLite::DataFrame](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Visualization/Visualizing%20data%20with%20daru%20DataFrame.ipynb)
69
- * [Plotting using GnuplotRB](http://nbviewer.jupyter.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Plotting/Gnuplotrb.ipynb)
70
- * [Vector plotting with Gruff](http://nbviewer.jupyter.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Plotting/Gruff%20Vector.ipynb)
71
- * [DataFrame plotting with Gruff](http://nbviewer.jupyter.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Plotting/Gruff%20DataFrame.ipynb)
72
-
73
- #### Notebooks on Time series
74
-
75
- * [Basic Time Series](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Basic%20Time%20Series.ipynb)
76
- * [Time Series Analysis and Plotting](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Time%20Series%20Functions.ipynb)
77
-
78
- #### Notebooks on Indexing
79
- * [Indexing in Vector](http://nbviewer.jupyter.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Categorical%20Data/Indexing%20in%20Vector.ipynb)
80
- * [Indexing in DataFrame](http://nbviewer.jupyter.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Categorical%20Data/Indexing%20in%20DataFrame.ipynb)
81
-
82
- ### Case Studies
83
-
84
- * [Logistic Regression Analysis with daru and statsample-glm](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Logistic%20Regression%20with%20daru%20and%20statsample-glm.ipynb)
85
- * [Finding and Plotting most heard artists from a Last.fm dataset](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Finding%20and%20plotting%20the%20most%20heard%20artists%20on%20last%20fm.ipynb)
86
- * [Analyzing baby names with daru](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Analyzing%20baby%20names/Use%20Case%20-%20Daru%20for%20analyzing%20baby%20names%20data.ipynb)
87
- * [Example usage of Categorical Data](http://nbviewer.jupyter.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Categorical%20Data/examples/%5BExample%5D%20Categorical%20Data.ipynb)
88
- * [Example usage of Categorical Index](http://nbviewer.jupyter.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Categorical%20Data/examples/%5BExample%5D%20Categorical%20Index.ipynb)
89
-
90
- ## Blog Posts
91
-
92
- * [Data Analysis in RUby: Basic data manipulation and plotting](http://v0dro.github.io/blog/2014/11/25/data-analysis-in-ruby-basic-data-manipulation-and-plotting/)
93
- * [Data Analysis in RUby: Splitting, sorting, aggregating data and data types](http://v0dro.github.io/blog/2015/02/24/data-analysis-in-ruby-part-2/)
94
- * [Finding and Combining data in daru](http://v0dro.github.io/blog/2015/08/03/finding-and-combining-data-in-daru/)
95
- * [Introduction to analyzing datasets with daru library](http://gafur.me/2018/02/05/analysing-datasets-with-daru-library.html)
96
-
97
- ### Time series
98
-
99
- * [Analysis of Time Series in daru](http://v0dro.github.io/blog/2015/07/31/analysis-of-time-series-in-daru/)
100
- * [Date Offsets in Daru](http://v0dro.github.io/blog/2015/07/27/date-offsets-in-daru/)
101
-
102
- ### Categorical Data
103
-
104
- * [Categorical Index](http://lokeshh.github.io/gsoc2016/blog/2016/06/14/categorical-index/)
105
- * [Categorical Data](http://lokeshh.github.io/gsoc2016/blog/2016/06/21/categorical-data/)
106
- * [Visualization with Categorical Data](http://lokeshh.github.io/gsoc2016/blog/2016/07/02/visualization/)
26
+ or add daru Lite to your Gemfile:
27
+ ```console
28
+ $ bundle add daru_lite
29
+ ```
107
30
 
108
31
  ## Basic Usage
109
32
 
110
- daru exposes two major data structures: `DataFrame` and `Vector`. The Vector is a basic 1-D structure corresponding to a labelled Array, while the `DataFrame` - daru's primary data structure - is 2-D spreadsheet-like structure for manipulating and storing data sets.
33
+ daru Lite exposes two major data structures: `DataFrame` and `Vector`. The Vector is a basic 1-D structure corresponding to a labelled Array, while the `DataFrame` - daru's primary data structure - is 2-D spreadsheet-like structure for manipulating and storing data sets.
111
34
 
112
35
  Basic DataFrame intitialization.
113
36
 
@@ -194,25 +117,6 @@ data_frame.where(
194
117
  ```
195
118
  ![con1](images/con1.png)
196
119
 
197
- *Plotting*
198
-
199
- daru supports plotting out of the box with [gnuplotrb](https://github.com/SciRuby/gnuplotrb).
200
-
201
120
  ## Documentation
202
121
 
203
- Docs can be found [here](http://www.rubydoc.info/gems/daru).
204
-
205
- ## Contributing
206
-
207
- Pick a feature from the Roadmap or the issue tracker or think of your own and send me a Pull Request!
208
-
209
- For details see [CONTRIBUTING](https://github.com/SciRuby/daru/blob/master/CONTRIBUTING.md).
210
-
211
- ## Acknowledgements
212
-
213
- * Google and the Ruby Science Foundation for the Google Summer of Code 2016 grant for speed enhancements and implementation of support for categorical data. Special thanks to [@lokeshh](https://github.com/lokeshh), [@zverok](https://github.com/zverok) and [@agisga](https://github.com/agisga) for their efforts.
214
- * Google and the Ruby Science Foundation for the Google Summer of Code 2015 grant for further developing daru and integrating it with other ruby gems.
215
- * Thank you [last.fm](http://www.last.fm/) for making user data accessible to the public.
216
-
217
- Copyright (c) 2015, Sameer Deshmukh
218
- All rights reserved
122
+ Docs can be found [here](http://www.rubydoc.info/gems/daru_lite).
data/daru_lite.gemspec CHANGED
@@ -47,6 +47,7 @@ Gem::Specification.new do |spec|
47
47
  spec.add_development_dependency 'rubocop-rspec', '~> 2.25'
48
48
  spec.add_development_dependency 'ruby-prof', '~> 1.7.0'
49
49
  spec.add_development_dependency 'simplecov', '~> 0.22.0'
50
+ spec.add_development_dependency 'simplecov_json_formatter', '~> 0.1.4'
50
51
  spec.add_development_dependency 'spreadsheet', '~> 1.3.0'
51
52
  spec.add_development_dependency 'sqlite3', '~> 1.7.2'
52
53
  # issue : https://github.com/SciRuby/daru/issues/493 occured
@@ -0,0 +1,165 @@
1
+ module DaruLite
2
+ class DataFrame
3
+ module Aggregatable
4
+ # Group elements by vector to perform operations on them. Returns a
5
+ # DaruLite::Core::GroupBy object.See the DaruLite::Core::GroupBy docs for a detailed
6
+ # list of possible operations.
7
+ #
8
+ # == Arguments
9
+ #
10
+ # * vectors - An Array contatining names of vectors to group by.
11
+ #
12
+ # == Usage
13
+ #
14
+ # df = DaruLite::DataFrame.new({
15
+ # a: %w{foo bar foo bar foo bar foo foo},
16
+ # b: %w{one one two three two two one three},
17
+ # c: [1 ,2 ,3 ,1 ,3 ,6 ,3 ,8],
18
+ # d: [11 ,22 ,33 ,44 ,55 ,66 ,77 ,88]
19
+ # })
20
+ # df.group_by([:a,:b,:c]).groups
21
+ # #=> {["bar", "one", 2]=>[1],
22
+ # # ["bar", "three", 1]=>[3],
23
+ # # ["bar", "two", 6]=>[5],
24
+ # # ["foo", "one", 1]=>[0],
25
+ # # ["foo", "one", 3]=>[6],
26
+ # # ["foo", "three", 8]=>[7],
27
+ # # ["foo", "two", 3]=>[2, 4]}
28
+ def group_by(*vectors)
29
+ vectors.flatten!
30
+ missing = vectors - @vectors.to_a
31
+ raise(ArgumentError, "Vector(s) missing: #{missing.join(', ')}") unless missing.empty?
32
+
33
+ vectors = [@vectors.first] if vectors.empty?
34
+
35
+ DaruLite::Core::GroupBy.new(self, vectors)
36
+ end
37
+
38
+ # Function to use for aggregating the data.
39
+ #
40
+ # @param options [Hash] options for column, you want in resultant dataframe
41
+ #
42
+ # @return [DaruLite::DataFrame]
43
+ #
44
+ # @example
45
+ # df = DaruLite::DataFrame.new(
46
+ # {col: [:a, :b, :c, :d, :e], num: [52,12,07,17,01]})
47
+ # => #<DaruLite::DataFrame(5x2)>
48
+ # col num
49
+ # 0 a 52
50
+ # 1 b 12
51
+ # 2 c 7
52
+ # 3 d 17
53
+ # 4 e 1
54
+ #
55
+ # df.aggregate(num_100_times: ->(df) { (df.num*100).first })
56
+ # => #<DaruLite::DataFrame(5x1)>
57
+ # num_100_ti
58
+ # 0 5200
59
+ # 1 1200
60
+ # 2 700
61
+ # 3 1700
62
+ # 4 100
63
+ #
64
+ # When we have duplicate index :
65
+ #
66
+ # idx = DaruLite::CategoricalIndex.new [:a, :b, :a, :a, :c]
67
+ # df = DaruLite::DataFrame.new({num: [52,12,07,17,01]}, index: idx)
68
+ # => #<DaruLite::DataFrame(5x1)>
69
+ # num
70
+ # a 52
71
+ # b 12
72
+ # a 7
73
+ # a 17
74
+ # c 1
75
+ #
76
+ # df.aggregate(num: :mean)
77
+ # => #<DaruLite::DataFrame(3x1)>
78
+ # num
79
+ # a 25.3333333
80
+ # b 12
81
+ # c 1
82
+ #
83
+ # Note: `GroupBy` class `aggregate` method uses this `aggregate` method
84
+ # internally.
85
+ def aggregate(options = {}, multi_index_level = -1)
86
+ if block_given?
87
+ positions_tuples, new_index = yield(@index) # NOTE: use of yield is private for now
88
+ else
89
+ positions_tuples, new_index = group_index_for_aggregation(@index, multi_index_level)
90
+ end
91
+
92
+ colmn_value = aggregate_by_positions_tuples(options, positions_tuples)
93
+
94
+ DaruLite::DataFrame.new(colmn_value, index: new_index, order: options.keys)
95
+ end
96
+
97
+ def group_by_and_aggregate(*group_by_keys, **aggregation_map)
98
+ group_by(*group_by_keys).aggregate(aggregation_map)
99
+ end
100
+
101
+ private
102
+
103
+ def aggregate_by_positions_tuples(options, positions_tuples)
104
+ agg_over_vectors_only, options = cast_aggregation_options(options)
105
+
106
+ if agg_over_vectors_only
107
+ options.map do |vect_name, method|
108
+ vect = self[vect_name]
109
+
110
+ positions_tuples.map do |positions|
111
+ vect.apply_method_on_sub_vector(method, keys: positions)
112
+ end
113
+ end
114
+ else
115
+ methods = options.values
116
+
117
+ # NOTE: because we aggregate over rows, we don't have to re-get sub-dfs for each method (which is expensive)
118
+ rows = positions_tuples.map do |positions|
119
+ apply_method_on_sub_df(methods, keys: positions)
120
+ end
121
+
122
+ rows.transpose
123
+ end
124
+ end
125
+
126
+ # convert operations over sub-vectors to operations over sub-dfs when it improves perf
127
+ # note: we don't always "cast" because aggregation over a single vector / a few vector is faster
128
+ # than aggregation over (sub-)dfs
129
+ def cast_aggregation_options(options)
130
+ vects, non_vects = options.keys.partition { |k| @vectors.include?(k) }
131
+
132
+ over_vectors = true
133
+
134
+ if non_vects.any?
135
+ options = options.clone
136
+
137
+ vects.each do |name|
138
+ proc_on_vect = options[name].to_proc
139
+ options[name] = ->(sub_df) { proc_on_vect.call(sub_df[name]) }
140
+ end
141
+
142
+ over_vectors = false
143
+ end
144
+
145
+ [over_vectors, options]
146
+ end
147
+
148
+ def group_index_for_aggregation(index, multi_index_level = -1)
149
+ case index
150
+ when DaruLite::MultiIndex
151
+ groups_by_pos = DaruLite::Core::GroupBy.get_positions_group_for_aggregation(index, multi_index_level)
152
+
153
+ new_index = DaruLite::MultiIndex.from_tuples(groups_by_pos.keys).coerce_index
154
+ pos_tuples = groups_by_pos.values
155
+ when DaruLite::Index, DaruLite::CategoricalIndex
156
+ new_index = Array(index).uniq
157
+ pos_tuples = new_index.map { |idx| [*index.pos(idx)] }
158
+ else raise
159
+ end
160
+
161
+ [pos_tuples, new_index]
162
+ end
163
+ end
164
+ end
165
+ end