daru_lite 0.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/ISSUE_TEMPLATE/bug_report.md +38 -0
- data/.github/ISSUE_TEMPLATE/feature_request.md +20 -0
- data/.github/workflows/ci.yml +20 -0
- data/.rubocop_todo.yml +35 -33
- data/README.md +19 -115
- data/daru_lite.gemspec +1 -0
- data/lib/daru_lite/data_frame/aggregatable.rb +165 -0
- data/lib/daru_lite/data_frame/calculatable.rb +140 -0
- data/lib/daru_lite/data_frame/convertible.rb +107 -0
- data/lib/daru_lite/data_frame/duplicatable.rb +64 -0
- data/lib/daru_lite/data_frame/fetchable.rb +301 -0
- data/lib/daru_lite/data_frame/filterable.rb +144 -0
- data/lib/daru_lite/data_frame/i_o_able.rb +179 -0
- data/lib/daru_lite/data_frame/indexable.rb +168 -0
- data/lib/daru_lite/data_frame/iterable.rb +339 -0
- data/lib/daru_lite/data_frame/joinable.rb +152 -0
- data/lib/daru_lite/data_frame/missable.rb +75 -0
- data/lib/daru_lite/data_frame/pivotable.rb +108 -0
- data/lib/daru_lite/data_frame/queryable.rb +67 -0
- data/lib/daru_lite/data_frame/setable.rb +109 -0
- data/lib/daru_lite/data_frame/sortable.rb +241 -0
- data/lib/daru_lite/dataframe.rb +142 -2355
- data/lib/daru_lite/index/index.rb +13 -0
- data/lib/daru_lite/maths/statistics/vector.rb +1 -1
- data/lib/daru_lite/vector/aggregatable.rb +9 -0
- data/lib/daru_lite/vector/calculatable.rb +78 -0
- data/lib/daru_lite/vector/convertible.rb +77 -0
- data/lib/daru_lite/vector/duplicatable.rb +17 -0
- data/lib/daru_lite/vector/fetchable.rb +175 -0
- data/lib/daru_lite/vector/filterable.rb +128 -0
- data/lib/daru_lite/vector/indexable.rb +77 -0
- data/lib/daru_lite/vector/iterable.rb +95 -0
- data/lib/daru_lite/vector/joinable.rb +17 -0
- data/lib/daru_lite/vector/missable.rb +124 -0
- data/lib/daru_lite/vector/queryable.rb +45 -0
- data/lib/daru_lite/vector/setable.rb +47 -0
- data/lib/daru_lite/vector/sortable.rb +113 -0
- data/lib/daru_lite/vector.rb +36 -932
- data/lib/daru_lite/version.rb +1 -1
- data/spec/data_frame/aggregatable_example.rb +65 -0
- data/spec/data_frame/buildable_example.rb +109 -0
- data/spec/data_frame/calculatable_example.rb +135 -0
- data/spec/data_frame/convertible_example.rb +180 -0
- data/spec/data_frame/duplicatable_example.rb +111 -0
- data/spec/data_frame/fetchable_example.rb +476 -0
- data/spec/data_frame/filterable_example.rb +250 -0
- data/spec/data_frame/indexable_example.rb +221 -0
- data/spec/data_frame/iterable_example.rb +465 -0
- data/spec/data_frame/joinable_example.rb +106 -0
- data/spec/data_frame/missable_example.rb +47 -0
- data/spec/data_frame/pivotable_example.rb +297 -0
- data/spec/data_frame/queryable_example.rb +92 -0
- data/spec/data_frame/setable_example.rb +482 -0
- data/spec/data_frame/sortable_example.rb +350 -0
- data/spec/dataframe_spec.rb +181 -3243
- data/spec/index/index_spec.rb +8 -0
- data/spec/vector/aggregatable_example.rb +27 -0
- data/spec/vector/calculatable_example.rb +82 -0
- data/spec/vector/convertible_example.rb +126 -0
- data/spec/vector/duplicatable_example.rb +48 -0
- data/spec/vector/fetchable_example.rb +463 -0
- data/spec/vector/filterable_example.rb +165 -0
- data/spec/vector/indexable_example.rb +201 -0
- data/spec/vector/iterable_example.rb +111 -0
- data/spec/vector/joinable_example.rb +25 -0
- data/spec/vector/missable_example.rb +88 -0
- data/spec/vector/queryable_example.rb +91 -0
- data/spec/vector/setable_example.rb +300 -0
- data/spec/vector/sortable_example.rb +242 -0
- data/spec/vector_spec.rb +111 -1805
- metadata +102 -3
- data/.github/ISSUE_TEMPLATE.md +0 -18
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1fca8a59ee849230424502a8ffa2f986134ccf522d15d53ab3807c22b64b30f8
|
4
|
+
data.tar.gz: 8c4e8048ea8171c463b048ac9dff8b86a8b19e3ec5dd62f16bf72311e7b03b38
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 403d6cfe869dcd152f083ea0878be37f6a8b40212f6ba5f80ece21bcadf51a4f13471f529bbddcf66b593568f31ec52f3e308c39160f0bd87bac9af6d95b30f6
|
7
|
+
data.tar.gz: dfbc2d7b5e63c54980c704c0df3d96ae8d079b921fc0ff51a34f109126a2a382d531457321737e83a2b03bc114b741e3018d0beb9cb00554aa822345d94f3144
|
@@ -0,0 +1,38 @@
|
|
1
|
+
---
|
2
|
+
name: Bug report
|
3
|
+
about: Create a report to help us improve
|
4
|
+
title: ''
|
5
|
+
labels: ''
|
6
|
+
assignees: ''
|
7
|
+
|
8
|
+
---
|
9
|
+
|
10
|
+
**Describe the bug**
|
11
|
+
A clear and concise description of what the bug is.
|
12
|
+
|
13
|
+
**To Reproduce**
|
14
|
+
Steps to reproduce the behavior:
|
15
|
+
1. Go to '...'
|
16
|
+
2. Click on '....'
|
17
|
+
3. Scroll down to '....'
|
18
|
+
4. See error
|
19
|
+
|
20
|
+
**Expected behavior**
|
21
|
+
A clear and concise description of what you expected to happen.
|
22
|
+
|
23
|
+
**Screenshots**
|
24
|
+
If applicable, add screenshots to help explain your problem.
|
25
|
+
|
26
|
+
**Desktop (please complete the following information):**
|
27
|
+
- OS: [e.g. iOS]
|
28
|
+
- Browser [e.g. chrome, safari]
|
29
|
+
- Version [e.g. 22]
|
30
|
+
|
31
|
+
**Smartphone (please complete the following information):**
|
32
|
+
- Device: [e.g. iPhone6]
|
33
|
+
- OS: [e.g. iOS8.1]
|
34
|
+
- Browser [e.g. stock browser, safari]
|
35
|
+
- Version [e.g. 22]
|
36
|
+
|
37
|
+
**Additional context**
|
38
|
+
Add any other context about the problem here.
|
@@ -0,0 +1,20 @@
|
|
1
|
+
---
|
2
|
+
name: Feature request
|
3
|
+
about: Suggest an idea for this project
|
4
|
+
title: ''
|
5
|
+
labels: ''
|
6
|
+
assignees: ''
|
7
|
+
|
8
|
+
---
|
9
|
+
|
10
|
+
**Is your feature request related to a problem? Please describe.**
|
11
|
+
A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
|
12
|
+
|
13
|
+
**Describe the solution you'd like**
|
14
|
+
A clear and concise description of what you want to happen.
|
15
|
+
|
16
|
+
**Describe alternatives you've considered**
|
17
|
+
A clear and concise description of any alternative solutions or features you've considered.
|
18
|
+
|
19
|
+
**Additional context**
|
20
|
+
Add any other context or screenshots about the feature request here.
|
data/.github/workflows/ci.yml
CHANGED
@@ -1,6 +1,15 @@
|
|
1
1
|
name: CI
|
2
2
|
on: [push]
|
3
3
|
|
4
|
+
env:
|
5
|
+
CC_TEST_REPORTER_ID: ${{secrets.CC_TEST_REPORTER_ID}}
|
6
|
+
# `github.ref` points to the *merge commit* when running tests on a pull request, which will be a commit
|
7
|
+
# that doesn't exists in our code base. Since this workflow triggers from a PR, we use the HEAD SHA instead.
|
8
|
+
#
|
9
|
+
# NOTE: These are both used by Code Climate (cc-test-reporter).
|
10
|
+
GIT_COMMIT_SHA: ${{github.event.pull_request.head.sha}}
|
11
|
+
GIT_BRANCH: ${{github.head_ref}}
|
12
|
+
|
4
13
|
jobs:
|
5
14
|
lint:
|
6
15
|
runs-on: ubuntu-latest
|
@@ -29,5 +38,16 @@ jobs:
|
|
29
38
|
with:
|
30
39
|
ruby-version: ${{ matrix.ruby-version }}
|
31
40
|
bundler-cache: true # runs 'bundle install' and caches installed gems automatically
|
41
|
+
- name: "Download cc-test-reporter from codeclimate.com"
|
42
|
+
run: |
|
43
|
+
curl -L https://codeclimate.com/downloads/test-reporter/test-reporter-latest-linux-amd64 > ./cc-test-reporter
|
44
|
+
chmod +x ./cc-test-reporter
|
45
|
+
- name: "Report to Code Climate that we will send a coverage report."
|
46
|
+
run: ./cc-test-reporter before-build
|
32
47
|
- name: Run tests
|
33
48
|
run: bundle exec rspec
|
49
|
+
- name: Upload code coverage to Code Climate
|
50
|
+
run: |
|
51
|
+
./cc-test-reporter after-build \
|
52
|
+
--coverage-input-type simplecov \
|
53
|
+
./coverage/.resultset.json
|
data/.rubocop_todo.yml
CHANGED
@@ -1,11 +1,19 @@
|
|
1
1
|
# This configuration was generated by
|
2
2
|
# `rubocop --auto-gen-config`
|
3
|
-
# on
|
3
|
+
# on 2024-03-03 13:59:21 UTC using RuboCop version 1.60.2.
|
4
4
|
# The point is for the user to remove these configuration records
|
5
5
|
# one by one as the offenses are removed from the code base.
|
6
6
|
# Note that changes in the inspected code, or installation of new
|
7
7
|
# versions of RuboCop, may require this file to be generated again.
|
8
8
|
|
9
|
+
# Offense count: 1
|
10
|
+
# This cop supports safe autocorrection (--autocorrect).
|
11
|
+
# Configuration parameters: EnforcedStyle, IndentationWidth.
|
12
|
+
# SupportedStyles: aligned, indented
|
13
|
+
Layout/LineEndStringConcatenationIndentation:
|
14
|
+
Exclude:
|
15
|
+
- 'lib/daru_lite/data_frame/indexable.rb'
|
16
|
+
|
9
17
|
# Offense count: 1
|
10
18
|
# Configuration parameters: AllowComments.
|
11
19
|
Lint/EmptyClass:
|
@@ -13,6 +21,7 @@ Lint/EmptyClass:
|
|
13
21
|
- 'lib/daru_lite/accessors/mdarray_wrapper.rb'
|
14
22
|
|
15
23
|
# Offense count: 5
|
24
|
+
# Configuration parameters: AllowedParentClasses.
|
16
25
|
Lint/MissingSuper:
|
17
26
|
Exclude:
|
18
27
|
- 'lib/daru_lite/date_time/offsets.rb'
|
@@ -20,61 +29,50 @@ Lint/MissingSuper:
|
|
20
29
|
- 'lib/daru_lite/index/index.rb'
|
21
30
|
- 'lib/daru_lite/index/multi_index.rb'
|
22
31
|
|
23
|
-
# Offense count:
|
32
|
+
# Offense count: 5
|
33
|
+
# This cop supports safe autocorrection (--autocorrect).
|
24
34
|
# Configuration parameters: CheckForMethodsWithNoSideEffects.
|
25
35
|
Lint/Void:
|
26
36
|
Exclude:
|
27
37
|
- 'lib/daru_lite/category.rb'
|
28
|
-
- 'lib/daru_lite/
|
38
|
+
- 'lib/daru_lite/data_frame/indexable.rb'
|
29
39
|
- 'lib/daru_lite/vector.rb'
|
30
40
|
|
31
|
-
# Offense count:
|
32
|
-
# Configuration parameters: AllowedMethods, AllowedPatterns,
|
41
|
+
# Offense count: 41
|
42
|
+
# Configuration parameters: AllowedMethods, AllowedPatterns, CountRepeatedAttributes.
|
33
43
|
Metrics/AbcSize:
|
34
44
|
Max: 34
|
35
45
|
|
36
|
-
# Offense count:
|
46
|
+
# Offense count: 5
|
37
47
|
# Configuration parameters: CountComments, CountAsOne.
|
38
48
|
Metrics/ClassLength:
|
39
|
-
Max:
|
49
|
+
Max: 188
|
40
50
|
|
41
51
|
# Offense count: 6
|
42
|
-
# Configuration parameters: AllowedMethods, AllowedPatterns
|
52
|
+
# Configuration parameters: AllowedMethods, AllowedPatterns.
|
43
53
|
Metrics/CyclomaticComplexity:
|
44
54
|
Max: 9
|
45
55
|
|
46
|
-
# Offense count:
|
47
|
-
# Configuration parameters: CountComments, CountAsOne,
|
56
|
+
# Offense count: 60
|
57
|
+
# Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns.
|
48
58
|
Metrics/MethodLength:
|
49
59
|
Max: 15
|
50
60
|
|
51
|
-
# Offense count:
|
61
|
+
# Offense count: 4
|
52
62
|
# Configuration parameters: CountComments, CountAsOne.
|
53
63
|
Metrics/ModuleLength:
|
54
64
|
Max: 190
|
55
65
|
|
56
66
|
# Offense count: 4
|
57
|
-
# Configuration parameters: AllowedMethods, AllowedPatterns
|
67
|
+
# Configuration parameters: AllowedMethods, AllowedPatterns.
|
58
68
|
Metrics/PerceivedComplexity:
|
59
69
|
Max: 10
|
60
70
|
|
61
|
-
# Offense count:
|
71
|
+
# Offense count: 66
|
62
72
|
# Configuration parameters: MinNameLength, AllowNamesEndingInNumbers, AllowedNames, ForbiddenNames.
|
63
|
-
# AllowedNames: at, by, db, id, in, io, ip, of, on, os, pp, to
|
73
|
+
# AllowedNames: as, at, by, cc, db, id, if, in, io, ip, of, on, os, pp, to
|
64
74
|
Naming/MethodParameterName:
|
65
|
-
|
66
|
-
- 'lib/daru_lite/category.rb'
|
67
|
-
- 'lib/daru_lite/core/group_by.rb'
|
68
|
-
- 'lib/daru_lite/core/merge.rb'
|
69
|
-
- 'lib/daru_lite/core/query.rb'
|
70
|
-
- 'lib/daru_lite/dataframe.rb'
|
71
|
-
- 'lib/daru_lite/date_time/index.rb'
|
72
|
-
- 'lib/daru_lite/date_time/offsets.rb'
|
73
|
-
- 'lib/daru_lite/extensions/which_dsl.rb'
|
74
|
-
- 'lib/daru_lite/io/io.rb'
|
75
|
-
- 'lib/daru_lite/maths/statistics/dataframe.rb'
|
76
|
-
- 'lib/daru_lite/maths/statistics/vector.rb'
|
77
|
-
- 'lib/daru_lite/vector.rb'
|
75
|
+
Enabled: false
|
78
76
|
|
79
77
|
# Offense count: 5
|
80
78
|
# Configuration parameters: NamePrefix, ForbiddenPrefixes, AllowedMethods, MethodDefinitionMacros.
|
@@ -85,13 +83,14 @@ Naming/MethodParameterName:
|
|
85
83
|
Naming/PredicateName:
|
86
84
|
Exclude:
|
87
85
|
- 'spec/**/*'
|
88
|
-
- 'lib/daru_lite/
|
86
|
+
- 'lib/daru_lite/data_frame/missable.rb'
|
87
|
+
- 'lib/daru_lite/data_frame/queryable.rb'
|
89
88
|
- 'lib/daru_lite/vector.rb'
|
90
89
|
|
91
90
|
# Offense count: 5
|
92
91
|
Security/MarshalLoad:
|
93
92
|
Exclude:
|
94
|
-
- 'lib/daru_lite/
|
93
|
+
- 'lib/daru_lite/data_frame/i_o_able.rb'
|
95
94
|
- 'lib/daru_lite/date_time/index.rb'
|
96
95
|
- 'lib/daru_lite/index/index.rb'
|
97
96
|
- 'lib/daru_lite/io/io.rb'
|
@@ -102,7 +101,7 @@ Style/ClassVars:
|
|
102
101
|
Exclude:
|
103
102
|
- 'lib/daru_lite.rb'
|
104
103
|
|
105
|
-
# Offense count:
|
104
|
+
# Offense count: 58
|
106
105
|
# Configuration parameters: AllowedConstants.
|
107
106
|
Style/Documentation:
|
108
107
|
Enabled: false
|
@@ -113,6 +112,10 @@ Style/MapToHash:
|
|
113
112
|
Exclude:
|
114
113
|
- 'lib/daru_lite/category.rb'
|
115
114
|
- 'lib/daru_lite/core/group_by.rb'
|
115
|
+
- 'lib/daru_lite/data_frame/convertible.rb'
|
116
|
+
- 'lib/daru_lite/data_frame/duplicatable.rb'
|
117
|
+
- 'lib/daru_lite/data_frame/fetchable.rb'
|
118
|
+
- 'lib/daru_lite/data_frame/joinable.rb'
|
116
119
|
- 'lib/daru_lite/dataframe.rb'
|
117
120
|
|
118
121
|
# Offense count: 1
|
@@ -125,7 +128,7 @@ Style/MultilineBlockChain:
|
|
125
128
|
# AllowedMethods: respond_to_missing?
|
126
129
|
Style/OptionalBooleanParameter:
|
127
130
|
Exclude:
|
128
|
-
- 'lib/daru_lite/
|
131
|
+
- 'lib/daru_lite/data_frame/convertible.rb'
|
129
132
|
- 'lib/daru_lite/maths/statistics/vector.rb'
|
130
133
|
- 'lib/daru_lite/vector.rb'
|
131
134
|
|
@@ -133,5 +136,4 @@ Style/OptionalBooleanParameter:
|
|
133
136
|
# This cop supports unsafe autocorrection (--autocorrect-all).
|
134
137
|
Style/RedundantSelfAssignment:
|
135
138
|
Exclude:
|
136
|
-
- 'lib/daru_lite/
|
137
|
-
|
139
|
+
- 'lib/daru_lite/data_frame/joinable.rb'
|
data/README.md
CHANGED
@@ -1,50 +1,21 @@
|
|
1
|
-
# daru - Data Analysis in RUby
|
1
|
+
# daru Lite - Data Analysis in RUby Lite
|
2
2
|
|
3
|
-
|
4
|
-
[![Build Status](https://travis-ci.org/SciRuby/daru.svg?branch=master)](https://travis-ci.org/SciRuby/daru)
|
5
|
-
[![Gitter](https://badges.gitter.im/v0dro/daru.svg)](https://gitter.im/v0dro/daru?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge)
|
6
|
-
[![Open Source Helpers](https://www.codetriage.com/sciruby/daru/badges/users.svg)](https://www.codetriage.com/sciruby/daru)
|
3
|
+
Simple, straightforward DataFrames for Ruby
|
7
4
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
daru makes it easy and intuitive to process data predominantly through 2 data structures:
|
13
|
-
`DaruLite::DataFrame` and `DaruLite::Vector`. Written in pure Ruby works with all ruby implementations.
|
14
|
-
Tested with MRI 2.5.1 and 2.7.1.
|
15
|
-
|
16
|
-
## daru plugin gems
|
17
|
-
|
18
|
-
- **[daru-view](https://github.com/SciRuby/daru-view)**
|
19
|
-
|
20
|
-
daru-view is for easy and interactive plotting in web application & IRuby
|
21
|
-
notebook. It can work in any Ruby web application frameworks like Rails, Sinatra, Nanoc and hopefully in others too.
|
22
|
-
|
23
|
-
Articles/Blogs, that summarize powerful features of daru-view:
|
24
|
-
|
25
|
-
* [GSoC 2017 daru-view](http://sciruby.com/blog/2017/09/01/gsoc-2017-data-visualization-using-daru-view/)
|
26
|
-
* [GSoC 2018 Progress Report](https://github.com/SciRuby/daru-view/wiki/GSoC-2018---Progress-Report)
|
27
|
-
* [HighCharts Official blog post regarding daru-view](https://www.highcharts.com/blog/post/i-am-ruby-developer-how-can-i-use-highcharts/)
|
28
|
-
|
29
|
-
- **[daru-io](https://github.com/SciRuby/daru-io)**
|
30
|
-
|
31
|
-
This gem extends support for many Import and Export methods of `DaruLite::DataFrame`. This gem is intended to help Rubyists who are into Data Analysis or Web Development, by serving as a general purpose conversion library that takes input in one format (say, JSON) and converts it another format (say, Avro) while also making it incredibly easy to getting started on analyzing data with daru. One can read more in [SciRuby/blog/daru-io](http://sciruby.com/blog/2017/08/29/gsoc-2017-support-to-import-export-of-more-formats/).
|
5
|
+
[![Build Status](https://github.com/pollandroll/daru_lite/actions/workflows/build.yml/badge.svg)](https://github.com/pollandroll/daru_lite/actions)
|
6
|
+
[![Gem Version](https://img.shields.io/gem/v/daru_lite.svg)](https://rubygems.org/gems/daru_lite)
|
7
|
+
[![Maintainability](https://api.codeclimate.com/v1/badges/f87d4ed10b5731e50184/maintainability)](https://codeclimate.com/github/pollandroll/daru_lite/maintainability)
|
8
|
+
[![Test Coverage](https://api.codeclimate.com/v1/badges/f87d4ed10b5731e50184/test_coverage)](https://codeclimate.com/github/pollandroll/daru_lite/test_coverage)
|
32
9
|
|
10
|
+
## Introduction
|
33
11
|
|
34
|
-
|
12
|
+
daru Lite is a library for data analysis and manipulation in Ruby.
|
35
13
|
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
* Singly and hierarchically indexed data structures.
|
42
|
-
* Flexible and intuitive API for manipulation and analysis of data.
|
43
|
-
* Easy plotting, statistics and arithmetic.
|
44
|
-
* Plentiful iterators.
|
45
|
-
* Easy splitting, aggregation and grouping of data.
|
46
|
-
* Quickly reducing data with pivot tables for quick data summary.
|
47
|
-
* Import and export data from and to Excel, CSV, SQL Databases, ActiveRecord and plain text files.
|
14
|
+
This project started as fork of [Daru](https://github.com/SciRuby/daru) with the objective to provide :
|
15
|
+
- a simple and yet powerfull interface to manipulate data using DataFrames
|
16
|
+
- a API consistent with the one historically provided by daru
|
17
|
+
- a focus on the core features around data manipulation, droped several cumbersome daru dependencies and the associated features : notably N-Matrix, GSL, R, imagemagick and all plotting libraries. The current project has no major dependencies
|
18
|
+
- build a future-proof library that can safely be used in production
|
48
19
|
|
49
20
|
## Installation
|
50
21
|
|
@@ -52,62 +23,14 @@ This gem extends support for many Import and Export methods of `DaruLite::DataFr
|
|
52
23
|
$ gem install daru_lite
|
53
24
|
```
|
54
25
|
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
* [Overview of most daru functions](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Daru%20Demo.ipynb)
|
60
|
-
* [Basic Creation of Vectors and DataFrame](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Creation%20of%20Vector%20and%20DataFrame.ipynb)
|
61
|
-
* [Detailed Usage of DaruLite::Vector](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Usage%20of%20Vector.ipynb)
|
62
|
-
* [Detailed Usage of DaruLite::DataFrame](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Usage%20of%20DataFrame.ipynb)
|
63
|
-
* [Searching and combining data in daru](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Searching%20and%20Combining%20Data.ipynb)
|
64
|
-
* [Grouping, Splitting and Pivoting Data](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Grouping%2C%20Splitting%20and%20Pivoting.ipynb)
|
65
|
-
* [Usage of Categorical Data](http://nbviewer.jupyter.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Categorical%20Data/Categorical%20Data.ipynb)
|
66
|
-
|
67
|
-
#### Visualization
|
68
|
-
* [Visualizing Data With DaruLite::DataFrame](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Visualization/Visualizing%20data%20with%20daru%20DataFrame.ipynb)
|
69
|
-
* [Plotting using GnuplotRB](http://nbviewer.jupyter.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Plotting/Gnuplotrb.ipynb)
|
70
|
-
* [Vector plotting with Gruff](http://nbviewer.jupyter.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Plotting/Gruff%20Vector.ipynb)
|
71
|
-
* [DataFrame plotting with Gruff](http://nbviewer.jupyter.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Plotting/Gruff%20DataFrame.ipynb)
|
72
|
-
|
73
|
-
#### Notebooks on Time series
|
74
|
-
|
75
|
-
* [Basic Time Series](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Basic%20Time%20Series.ipynb)
|
76
|
-
* [Time Series Analysis and Plotting](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Time%20Series%20Functions.ipynb)
|
77
|
-
|
78
|
-
#### Notebooks on Indexing
|
79
|
-
* [Indexing in Vector](http://nbviewer.jupyter.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Categorical%20Data/Indexing%20in%20Vector.ipynb)
|
80
|
-
* [Indexing in DataFrame](http://nbviewer.jupyter.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Categorical%20Data/Indexing%20in%20DataFrame.ipynb)
|
81
|
-
|
82
|
-
### Case Studies
|
83
|
-
|
84
|
-
* [Logistic Regression Analysis with daru and statsample-glm](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Logistic%20Regression%20with%20daru%20and%20statsample-glm.ipynb)
|
85
|
-
* [Finding and Plotting most heard artists from a Last.fm dataset](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Finding%20and%20plotting%20the%20most%20heard%20artists%20on%20last%20fm.ipynb)
|
86
|
-
* [Analyzing baby names with daru](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Analyzing%20baby%20names/Use%20Case%20-%20Daru%20for%20analyzing%20baby%20names%20data.ipynb)
|
87
|
-
* [Example usage of Categorical Data](http://nbviewer.jupyter.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Categorical%20Data/examples/%5BExample%5D%20Categorical%20Data.ipynb)
|
88
|
-
* [Example usage of Categorical Index](http://nbviewer.jupyter.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Categorical%20Data/examples/%5BExample%5D%20Categorical%20Index.ipynb)
|
89
|
-
|
90
|
-
## Blog Posts
|
91
|
-
|
92
|
-
* [Data Analysis in RUby: Basic data manipulation and plotting](http://v0dro.github.io/blog/2014/11/25/data-analysis-in-ruby-basic-data-manipulation-and-plotting/)
|
93
|
-
* [Data Analysis in RUby: Splitting, sorting, aggregating data and data types](http://v0dro.github.io/blog/2015/02/24/data-analysis-in-ruby-part-2/)
|
94
|
-
* [Finding and Combining data in daru](http://v0dro.github.io/blog/2015/08/03/finding-and-combining-data-in-daru/)
|
95
|
-
* [Introduction to analyzing datasets with daru library](http://gafur.me/2018/02/05/analysing-datasets-with-daru-library.html)
|
96
|
-
|
97
|
-
### Time series
|
98
|
-
|
99
|
-
* [Analysis of Time Series in daru](http://v0dro.github.io/blog/2015/07/31/analysis-of-time-series-in-daru/)
|
100
|
-
* [Date Offsets in Daru](http://v0dro.github.io/blog/2015/07/27/date-offsets-in-daru/)
|
101
|
-
|
102
|
-
### Categorical Data
|
103
|
-
|
104
|
-
* [Categorical Index](http://lokeshh.github.io/gsoc2016/blog/2016/06/14/categorical-index/)
|
105
|
-
* [Categorical Data](http://lokeshh.github.io/gsoc2016/blog/2016/06/21/categorical-data/)
|
106
|
-
* [Visualization with Categorical Data](http://lokeshh.github.io/gsoc2016/blog/2016/07/02/visualization/)
|
26
|
+
or add daru Lite to your Gemfile:
|
27
|
+
```console
|
28
|
+
$ bundle add daru_lite
|
29
|
+
```
|
107
30
|
|
108
31
|
## Basic Usage
|
109
32
|
|
110
|
-
daru exposes two major data structures: `DataFrame` and `Vector`. The Vector is a basic 1-D structure corresponding to a labelled Array, while the `DataFrame` - daru's primary data structure - is 2-D spreadsheet-like structure for manipulating and storing data sets.
|
33
|
+
daru Lite exposes two major data structures: `DataFrame` and `Vector`. The Vector is a basic 1-D structure corresponding to a labelled Array, while the `DataFrame` - daru's primary data structure - is 2-D spreadsheet-like structure for manipulating and storing data sets.
|
111
34
|
|
112
35
|
Basic DataFrame intitialization.
|
113
36
|
|
@@ -194,25 +117,6 @@ data_frame.where(
|
|
194
117
|
```
|
195
118
|
![con1](images/con1.png)
|
196
119
|
|
197
|
-
*Plotting*
|
198
|
-
|
199
|
-
daru supports plotting out of the box with [gnuplotrb](https://github.com/SciRuby/gnuplotrb).
|
200
|
-
|
201
120
|
## Documentation
|
202
121
|
|
203
|
-
Docs can be found [here](http://www.rubydoc.info/gems/
|
204
|
-
|
205
|
-
## Contributing
|
206
|
-
|
207
|
-
Pick a feature from the Roadmap or the issue tracker or think of your own and send me a Pull Request!
|
208
|
-
|
209
|
-
For details see [CONTRIBUTING](https://github.com/SciRuby/daru/blob/master/CONTRIBUTING.md).
|
210
|
-
|
211
|
-
## Acknowledgements
|
212
|
-
|
213
|
-
* Google and the Ruby Science Foundation for the Google Summer of Code 2016 grant for speed enhancements and implementation of support for categorical data. Special thanks to [@lokeshh](https://github.com/lokeshh), [@zverok](https://github.com/zverok) and [@agisga](https://github.com/agisga) for their efforts.
|
214
|
-
* Google and the Ruby Science Foundation for the Google Summer of Code 2015 grant for further developing daru and integrating it with other ruby gems.
|
215
|
-
* Thank you [last.fm](http://www.last.fm/) for making user data accessible to the public.
|
216
|
-
|
217
|
-
Copyright (c) 2015, Sameer Deshmukh
|
218
|
-
All rights reserved
|
122
|
+
Docs can be found [here](http://www.rubydoc.info/gems/daru_lite).
|
data/daru_lite.gemspec
CHANGED
@@ -47,6 +47,7 @@ Gem::Specification.new do |spec|
|
|
47
47
|
spec.add_development_dependency 'rubocop-rspec', '~> 2.25'
|
48
48
|
spec.add_development_dependency 'ruby-prof', '~> 1.7.0'
|
49
49
|
spec.add_development_dependency 'simplecov', '~> 0.22.0'
|
50
|
+
spec.add_development_dependency 'simplecov_json_formatter', '~> 0.1.4'
|
50
51
|
spec.add_development_dependency 'spreadsheet', '~> 1.3.0'
|
51
52
|
spec.add_development_dependency 'sqlite3', '~> 1.7.2'
|
52
53
|
# issue : https://github.com/SciRuby/daru/issues/493 occured
|
@@ -0,0 +1,165 @@
|
|
1
|
+
module DaruLite
|
2
|
+
class DataFrame
|
3
|
+
module Aggregatable
|
4
|
+
# Group elements by vector to perform operations on them. Returns a
|
5
|
+
# DaruLite::Core::GroupBy object.See the DaruLite::Core::GroupBy docs for a detailed
|
6
|
+
# list of possible operations.
|
7
|
+
#
|
8
|
+
# == Arguments
|
9
|
+
#
|
10
|
+
# * vectors - An Array contatining names of vectors to group by.
|
11
|
+
#
|
12
|
+
# == Usage
|
13
|
+
#
|
14
|
+
# df = DaruLite::DataFrame.new({
|
15
|
+
# a: %w{foo bar foo bar foo bar foo foo},
|
16
|
+
# b: %w{one one two three two two one three},
|
17
|
+
# c: [1 ,2 ,3 ,1 ,3 ,6 ,3 ,8],
|
18
|
+
# d: [11 ,22 ,33 ,44 ,55 ,66 ,77 ,88]
|
19
|
+
# })
|
20
|
+
# df.group_by([:a,:b,:c]).groups
|
21
|
+
# #=> {["bar", "one", 2]=>[1],
|
22
|
+
# # ["bar", "three", 1]=>[3],
|
23
|
+
# # ["bar", "two", 6]=>[5],
|
24
|
+
# # ["foo", "one", 1]=>[0],
|
25
|
+
# # ["foo", "one", 3]=>[6],
|
26
|
+
# # ["foo", "three", 8]=>[7],
|
27
|
+
# # ["foo", "two", 3]=>[2, 4]}
|
28
|
+
def group_by(*vectors)
|
29
|
+
vectors.flatten!
|
30
|
+
missing = vectors - @vectors.to_a
|
31
|
+
raise(ArgumentError, "Vector(s) missing: #{missing.join(', ')}") unless missing.empty?
|
32
|
+
|
33
|
+
vectors = [@vectors.first] if vectors.empty?
|
34
|
+
|
35
|
+
DaruLite::Core::GroupBy.new(self, vectors)
|
36
|
+
end
|
37
|
+
|
38
|
+
# Function to use for aggregating the data.
|
39
|
+
#
|
40
|
+
# @param options [Hash] options for column, you want in resultant dataframe
|
41
|
+
#
|
42
|
+
# @return [DaruLite::DataFrame]
|
43
|
+
#
|
44
|
+
# @example
|
45
|
+
# df = DaruLite::DataFrame.new(
|
46
|
+
# {col: [:a, :b, :c, :d, :e], num: [52,12,07,17,01]})
|
47
|
+
# => #<DaruLite::DataFrame(5x2)>
|
48
|
+
# col num
|
49
|
+
# 0 a 52
|
50
|
+
# 1 b 12
|
51
|
+
# 2 c 7
|
52
|
+
# 3 d 17
|
53
|
+
# 4 e 1
|
54
|
+
#
|
55
|
+
# df.aggregate(num_100_times: ->(df) { (df.num*100).first })
|
56
|
+
# => #<DaruLite::DataFrame(5x1)>
|
57
|
+
# num_100_ti
|
58
|
+
# 0 5200
|
59
|
+
# 1 1200
|
60
|
+
# 2 700
|
61
|
+
# 3 1700
|
62
|
+
# 4 100
|
63
|
+
#
|
64
|
+
# When we have duplicate index :
|
65
|
+
#
|
66
|
+
# idx = DaruLite::CategoricalIndex.new [:a, :b, :a, :a, :c]
|
67
|
+
# df = DaruLite::DataFrame.new({num: [52,12,07,17,01]}, index: idx)
|
68
|
+
# => #<DaruLite::DataFrame(5x1)>
|
69
|
+
# num
|
70
|
+
# a 52
|
71
|
+
# b 12
|
72
|
+
# a 7
|
73
|
+
# a 17
|
74
|
+
# c 1
|
75
|
+
#
|
76
|
+
# df.aggregate(num: :mean)
|
77
|
+
# => #<DaruLite::DataFrame(3x1)>
|
78
|
+
# num
|
79
|
+
# a 25.3333333
|
80
|
+
# b 12
|
81
|
+
# c 1
|
82
|
+
#
|
83
|
+
# Note: `GroupBy` class `aggregate` method uses this `aggregate` method
|
84
|
+
# internally.
|
85
|
+
def aggregate(options = {}, multi_index_level = -1)
|
86
|
+
if block_given?
|
87
|
+
positions_tuples, new_index = yield(@index) # NOTE: use of yield is private for now
|
88
|
+
else
|
89
|
+
positions_tuples, new_index = group_index_for_aggregation(@index, multi_index_level)
|
90
|
+
end
|
91
|
+
|
92
|
+
colmn_value = aggregate_by_positions_tuples(options, positions_tuples)
|
93
|
+
|
94
|
+
DaruLite::DataFrame.new(colmn_value, index: new_index, order: options.keys)
|
95
|
+
end
|
96
|
+
|
97
|
+
def group_by_and_aggregate(*group_by_keys, **aggregation_map)
|
98
|
+
group_by(*group_by_keys).aggregate(aggregation_map)
|
99
|
+
end
|
100
|
+
|
101
|
+
private
|
102
|
+
|
103
|
+
def aggregate_by_positions_tuples(options, positions_tuples)
|
104
|
+
agg_over_vectors_only, options = cast_aggregation_options(options)
|
105
|
+
|
106
|
+
if agg_over_vectors_only
|
107
|
+
options.map do |vect_name, method|
|
108
|
+
vect = self[vect_name]
|
109
|
+
|
110
|
+
positions_tuples.map do |positions|
|
111
|
+
vect.apply_method_on_sub_vector(method, keys: positions)
|
112
|
+
end
|
113
|
+
end
|
114
|
+
else
|
115
|
+
methods = options.values
|
116
|
+
|
117
|
+
# NOTE: because we aggregate over rows, we don't have to re-get sub-dfs for each method (which is expensive)
|
118
|
+
rows = positions_tuples.map do |positions|
|
119
|
+
apply_method_on_sub_df(methods, keys: positions)
|
120
|
+
end
|
121
|
+
|
122
|
+
rows.transpose
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
# convert operations over sub-vectors to operations over sub-dfs when it improves perf
|
127
|
+
# note: we don't always "cast" because aggregation over a single vector / a few vector is faster
|
128
|
+
# than aggregation over (sub-)dfs
|
129
|
+
def cast_aggregation_options(options)
|
130
|
+
vects, non_vects = options.keys.partition { |k| @vectors.include?(k) }
|
131
|
+
|
132
|
+
over_vectors = true
|
133
|
+
|
134
|
+
if non_vects.any?
|
135
|
+
options = options.clone
|
136
|
+
|
137
|
+
vects.each do |name|
|
138
|
+
proc_on_vect = options[name].to_proc
|
139
|
+
options[name] = ->(sub_df) { proc_on_vect.call(sub_df[name]) }
|
140
|
+
end
|
141
|
+
|
142
|
+
over_vectors = false
|
143
|
+
end
|
144
|
+
|
145
|
+
[over_vectors, options]
|
146
|
+
end
|
147
|
+
|
148
|
+
def group_index_for_aggregation(index, multi_index_level = -1)
|
149
|
+
case index
|
150
|
+
when DaruLite::MultiIndex
|
151
|
+
groups_by_pos = DaruLite::Core::GroupBy.get_positions_group_for_aggregation(index, multi_index_level)
|
152
|
+
|
153
|
+
new_index = DaruLite::MultiIndex.from_tuples(groups_by_pos.keys).coerce_index
|
154
|
+
pos_tuples = groups_by_pos.values
|
155
|
+
when DaruLite::Index, DaruLite::CategoricalIndex
|
156
|
+
new_index = Array(index).uniq
|
157
|
+
pos_tuples = new_index.map { |idx| [*index.pos(idx)] }
|
158
|
+
else raise
|
159
|
+
end
|
160
|
+
|
161
|
+
[pos_tuples, new_index]
|
162
|
+
end
|
163
|
+
end
|
164
|
+
end
|
165
|
+
end
|