daru_lite 0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.github/ISSUE_TEMPLATE.md +18 -0
- data/.github/workflows/ci.yml +33 -0
- data/.gitignore +10 -0
- data/.rspec +2 -0
- data/.rubocop.yml +27 -0
- data/.rubocop_todo.yml +137 -0
- data/CONTRIBUTING.md +47 -0
- data/Gemfile +2 -0
- data/History.md +4 -0
- data/LICENSE +24 -0
- data/README.md +218 -0
- data/Rakefile +69 -0
- data/ReleasePolicy.md +20 -0
- data/benchmarks/TradeoffData.csv +65 -0
- data/benchmarks/csv_reading.rb +22 -0
- data/benchmarks/dataframe_creation.rb +39 -0
- data/benchmarks/db_loading.rb +34 -0
- data/benchmarks/duplicating.rb +45 -0
- data/benchmarks/group_by.rb +32 -0
- data/benchmarks/joining.rb +52 -0
- data/benchmarks/row_access.rb +41 -0
- data/benchmarks/row_assign.rb +36 -0
- data/benchmarks/sorting.rb +51 -0
- data/benchmarks/statistics.rb +28 -0
- data/benchmarks/vector_access.rb +31 -0
- data/benchmarks/vector_assign.rb +42 -0
- data/benchmarks/where_clause.rb +48 -0
- data/benchmarks/where_vs_filter.rb +28 -0
- data/daru_lite.gemspec +55 -0
- data/images/README.md +5 -0
- data/images/con0.png +0 -0
- data/images/con1.png +0 -0
- data/images/init0.png +0 -0
- data/images/init1.png +0 -0
- data/images/man0.png +0 -0
- data/images/man1.png +0 -0
- data/images/man2.png +0 -0
- data/images/man3.png +0 -0
- data/images/man4.png +0 -0
- data/images/man5.png +0 -0
- data/images/man6.png +0 -0
- data/lib/daru_lite/accessors/array_wrapper.rb +109 -0
- data/lib/daru_lite/accessors/dataframe_by_row.rb +25 -0
- data/lib/daru_lite/accessors/mdarray_wrapper.rb +7 -0
- data/lib/daru_lite/category.rb +929 -0
- data/lib/daru_lite/configuration.rb +34 -0
- data/lib/daru_lite/core/group_by.rb +403 -0
- data/lib/daru_lite/core/merge.rb +270 -0
- data/lib/daru_lite/core/query.rb +109 -0
- data/lib/daru_lite/dataframe.rb +3080 -0
- data/lib/daru_lite/date_time/index.rb +569 -0
- data/lib/daru_lite/date_time/offsets.rb +397 -0
- data/lib/daru_lite/exceptions.rb +2 -0
- data/lib/daru_lite/extensions/which_dsl.rb +53 -0
- data/lib/daru_lite/formatters/table.rb +52 -0
- data/lib/daru_lite/helpers/array.rb +53 -0
- data/lib/daru_lite/index/categorical_index.rb +201 -0
- data/lib/daru_lite/index/index.rb +374 -0
- data/lib/daru_lite/index/multi_index.rb +374 -0
- data/lib/daru_lite/io/csv/converters.rb +21 -0
- data/lib/daru_lite/io/io.rb +294 -0
- data/lib/daru_lite/io/sql_data_source.rb +97 -0
- data/lib/daru_lite/iruby/helpers.rb +38 -0
- data/lib/daru_lite/iruby/templates/dataframe.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/dataframe_mi.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/dataframe_mi_tbody.html.erb +35 -0
- data/lib/daru_lite/iruby/templates/dataframe_mi_thead.html.erb +21 -0
- data/lib/daru_lite/iruby/templates/dataframe_tbody.html.erb +28 -0
- data/lib/daru_lite/iruby/templates/dataframe_thead.html.erb +21 -0
- data/lib/daru_lite/iruby/templates/multi_index.html.erb +12 -0
- data/lib/daru_lite/iruby/templates/vector.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/vector_mi.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/vector_mi_tbody.html.erb +26 -0
- data/lib/daru_lite/iruby/templates/vector_mi_thead.html.erb +8 -0
- data/lib/daru_lite/iruby/templates/vector_tbody.html.erb +17 -0
- data/lib/daru_lite/iruby/templates/vector_thead.html.erb +8 -0
- data/lib/daru_lite/maths/arithmetic/dataframe.rb +91 -0
- data/lib/daru_lite/maths/arithmetic/vector.rb +117 -0
- data/lib/daru_lite/maths/statistics/dataframe.rb +202 -0
- data/lib/daru_lite/maths/statistics/vector.rb +1019 -0
- data/lib/daru_lite/monkeys.rb +56 -0
- data/lib/daru_lite/vector.rb +1678 -0
- data/lib/daru_lite/version.rb +3 -0
- data/lib/daru_lite.rb +99 -0
- data/profile/_base.rb +23 -0
- data/profile/df_to_a.rb +10 -0
- data/profile/filter.rb +13 -0
- data/profile/joining.rb +13 -0
- data/profile/sorting.rb +12 -0
- data/profile/vector_each_with_index.rb +9 -0
- data/profile/vector_new.rb +9 -0
- data/spec/accessors/array_wrapper_spec.rb +3 -0
- data/spec/category_spec.rb +1741 -0
- data/spec/core/group_by_spec.rb +655 -0
- data/spec/core/merge_spec.rb +179 -0
- data/spec/core/query_spec.rb +347 -0
- data/spec/daru_lite_spec.rb +22 -0
- data/spec/dataframe_spec.rb +4330 -0
- data/spec/date_time/data_spec.rb +197 -0
- data/spec/date_time/date_time_index_helper_spec.rb +72 -0
- data/spec/date_time/index_spec.rb +588 -0
- data/spec/date_time/offsets_spec.rb +465 -0
- data/spec/extensions/which_dsl_spec.rb +38 -0
- data/spec/fixtures/bank2.dat +200 -0
- data/spec/fixtures/boolean_converter_test.csv +5 -0
- data/spec/fixtures/countries.json +7794 -0
- data/spec/fixtures/duplicates.csv +32 -0
- data/spec/fixtures/eciresults.html +394 -0
- data/spec/fixtures/empties.dat +2 -0
- data/spec/fixtures/empty_rows_test.csv +17 -0
- data/spec/fixtures/macau.html +3691 -0
- data/spec/fixtures/macd_data.csv +150 -0
- data/spec/fixtures/matrix_test.csv +100 -0
- data/spec/fixtures/moneycontrol.html +6812 -0
- data/spec/fixtures/music_data.tsv +2501 -0
- data/spec/fixtures/repeated_fields.csv +7 -0
- data/spec/fixtures/sales-funnel.csv +18 -0
- data/spec/fixtures/scientific_notation.csv +4 -0
- data/spec/fixtures/string_converter_test.csv +5 -0
- data/spec/fixtures/strings.dat +2 -0
- data/spec/fixtures/test_xls.xls +0 -0
- data/spec/fixtures/test_xls_2.xls +0 -0
- data/spec/fixtures/url_test.txt~ +0 -0
- data/spec/fixtures/valid_markup.html +62 -0
- data/spec/fixtures/wiki_climate.html +1243 -0
- data/spec/fixtures/wiki_table_info.html +631 -0
- data/spec/formatters/table_formatter_spec.rb +137 -0
- data/spec/helpers_spec.rb +8 -0
- data/spec/index/categorical_index_spec.rb +170 -0
- data/spec/index/index_spec.rb +417 -0
- data/spec/index/multi_index_spec.rb +680 -0
- data/spec/io/io_spec.rb +373 -0
- data/spec/io/sql_data_source_spec.rb +56 -0
- data/spec/iruby/dataframe_spec.rb +170 -0
- data/spec/iruby/helpers_spec.rb +49 -0
- data/spec/iruby/multi_index_spec.rb +37 -0
- data/spec/iruby/vector_spec.rb +105 -0
- data/spec/maths/arithmetic/dataframe_spec.rb +148 -0
- data/spec/maths/arithmetic/vector_spec.rb +165 -0
- data/spec/maths/statistics/dataframe_spec.rb +178 -0
- data/spec/maths/statistics/vector_spec.rb +756 -0
- data/spec/monkeys_spec.rb +42 -0
- data/spec/shared/vector_display_spec.rb +213 -0
- data/spec/spec_helper.rb +87 -0
- data/spec/support/database_helper.rb +30 -0
- data/spec/support/matchers.rb +5 -0
- data/spec/vector_spec.rb +2293 -0
- metadata +571 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 37e2e2dcfa3ec410a0633e843d645485cecddb76690373a383e26740375ba658
|
4
|
+
data.tar.gz: 6f3c87500e54eef1753b604676e44224c8ddd64197ef39e7c1a1d3cdd5d3a33b
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: f7a617ae7bdeb75e3368dfcfd4cb1f6ba96c31940fe2f94e73fc0ddca5e34754d4cc20855e249d166c72c957067b64c5de4220b79003424c2288ad560bfc4e2c
|
7
|
+
data.tar.gz: d22eb2e97a63a1a5595b0e70bc6e601861b838ab82742239fd7244d9de5d659afec85d21ed03a8b75c4a7e532b57ae6496147ee74ecb48dfe7f3beb54a231866
|
@@ -0,0 +1,18 @@
|
|
1
|
+
Heya! We are glad you are going to contribute to Daru by creating an issue, and kindly ask you to
|
2
|
+
follow the simple rules:
|
3
|
+
|
4
|
+
1. If it is a bug report, please provide a **self-containing** Ruby code for reproducing the bug.
|
5
|
+
This means if Daru contributors just copy-paste the code from issue into `this-is-bug.rb` and run
|
6
|
+
`ruby this-is-bug.rb`, it will be reproduced. If the bug is hard to spot (e.g. it is not some
|
7
|
+
`NoMethodError`, but the differences in data structure), please show it with comment in code or
|
8
|
+
plain text in the issue.
|
9
|
+
2. If it is a feature request, try to do the following (if possible):
|
10
|
+
* show how new feature will work with small code example;
|
11
|
+
* explain the use case (if it is not 200% obvious);
|
12
|
+
* if you are aware of it, show how it works in pandas and/or R.
|
13
|
+
3. If it is just a question ("how to do this or that" or "why Daru does this or that") feel free to
|
14
|
+
write it in any form that is convenient to you, but remember code examples and use cases are always
|
15
|
+
welcome.
|
16
|
+
|
17
|
+
Thanks! And please remove this text when finished with your issue description :)
|
18
|
+
|
@@ -0,0 +1,33 @@
|
|
1
|
+
name: CI
|
2
|
+
on: [push]
|
3
|
+
|
4
|
+
jobs:
|
5
|
+
lint:
|
6
|
+
runs-on: ubuntu-latest
|
7
|
+
|
8
|
+
steps:
|
9
|
+
- uses: actions/checkout@v3
|
10
|
+
- name: Set up Ruby
|
11
|
+
uses: ruby/setup-ruby@v1
|
12
|
+
with:
|
13
|
+
ruby-version: 3.1
|
14
|
+
bundler-cache: true # runs 'bundle install' and caches installed gems automatically
|
15
|
+
- name: Run linters
|
16
|
+
run: |
|
17
|
+
bundle exec rubocop
|
18
|
+
|
19
|
+
test:
|
20
|
+
runs-on: ubuntu-latest
|
21
|
+
strategy:
|
22
|
+
matrix:
|
23
|
+
ruby-version: ['3.1']
|
24
|
+
|
25
|
+
steps:
|
26
|
+
- uses: actions/checkout@v3
|
27
|
+
- name: Set up Ruby
|
28
|
+
uses: ruby/setup-ruby@v1
|
29
|
+
with:
|
30
|
+
ruby-version: ${{ matrix.ruby-version }}
|
31
|
+
bundler-cache: true # runs 'bundle install' and caches installed gems automatically
|
32
|
+
- name: Run tests
|
33
|
+
run: bundle exec rspec
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.rubocop.yml
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
inherit_from: .rubocop_todo.yml
|
2
|
+
|
3
|
+
AllCops:
|
4
|
+
Include:
|
5
|
+
- 'lib/**/*'
|
6
|
+
Exclude:
|
7
|
+
- 'daru_lite.gemspec'
|
8
|
+
- 'Rakefile'
|
9
|
+
- 'Gemfile'
|
10
|
+
- 'Guardfile'
|
11
|
+
- '**/*.erb'
|
12
|
+
- 'spec/*'
|
13
|
+
- 'spec/**/*'
|
14
|
+
- 'vendor/**/*'
|
15
|
+
- 'benchmarks/*'
|
16
|
+
- 'profile/*'
|
17
|
+
- 'tmp/*'
|
18
|
+
DisplayCopNames: true
|
19
|
+
TargetRubyVersion: 2.7
|
20
|
+
NewCops: enable
|
21
|
+
|
22
|
+
require:
|
23
|
+
- rubocop-performance
|
24
|
+
- rubocop-rspec
|
25
|
+
|
26
|
+
Style/FrozenStringLiteralComment:
|
27
|
+
EnforcedStyle: never
|
data/.rubocop_todo.yml
ADDED
@@ -0,0 +1,137 @@
|
|
1
|
+
# This configuration was generated by
|
2
|
+
# `rubocop --auto-gen-config`
|
3
|
+
# on 2022-08-16 13:20:50 UTC using RuboCop version 1.35.0.
|
4
|
+
# The point is for the user to remove these configuration records
|
5
|
+
# one by one as the offenses are removed from the code base.
|
6
|
+
# Note that changes in the inspected code, or installation of new
|
7
|
+
# versions of RuboCop, may require this file to be generated again.
|
8
|
+
|
9
|
+
# Offense count: 1
|
10
|
+
# Configuration parameters: AllowComments.
|
11
|
+
Lint/EmptyClass:
|
12
|
+
Exclude:
|
13
|
+
- 'lib/daru_lite/accessors/mdarray_wrapper.rb'
|
14
|
+
|
15
|
+
# Offense count: 5
|
16
|
+
Lint/MissingSuper:
|
17
|
+
Exclude:
|
18
|
+
- 'lib/daru_lite/date_time/offsets.rb'
|
19
|
+
- 'lib/daru_lite/index/categorical_index.rb'
|
20
|
+
- 'lib/daru_lite/index/index.rb'
|
21
|
+
- 'lib/daru_lite/index/multi_index.rb'
|
22
|
+
|
23
|
+
# Offense count: 6
|
24
|
+
# Configuration parameters: CheckForMethodsWithNoSideEffects.
|
25
|
+
Lint/Void:
|
26
|
+
Exclude:
|
27
|
+
- 'lib/daru_lite/category.rb'
|
28
|
+
- 'lib/daru_lite/dataframe.rb'
|
29
|
+
- 'lib/daru_lite/vector.rb'
|
30
|
+
|
31
|
+
# Offense count: 40
|
32
|
+
# Configuration parameters: AllowedMethods, AllowedPatterns, IgnoredMethods, CountRepeatedAttributes.
|
33
|
+
Metrics/AbcSize:
|
34
|
+
Max: 34
|
35
|
+
|
36
|
+
# Offense count: 3
|
37
|
+
# Configuration parameters: CountComments, CountAsOne.
|
38
|
+
Metrics/ClassLength:
|
39
|
+
Max: 189
|
40
|
+
|
41
|
+
# Offense count: 6
|
42
|
+
# Configuration parameters: AllowedMethods, AllowedPatterns, IgnoredMethods.
|
43
|
+
Metrics/CyclomaticComplexity:
|
44
|
+
Max: 9
|
45
|
+
|
46
|
+
# Offense count: 61
|
47
|
+
# Configuration parameters: CountComments, CountAsOne, ExcludedMethods, AllowedMethods, AllowedPatterns, IgnoredMethods.
|
48
|
+
Metrics/MethodLength:
|
49
|
+
Max: 15
|
50
|
+
|
51
|
+
# Offense count: 2
|
52
|
+
# Configuration parameters: CountComments, CountAsOne.
|
53
|
+
Metrics/ModuleLength:
|
54
|
+
Max: 190
|
55
|
+
|
56
|
+
# Offense count: 4
|
57
|
+
# Configuration parameters: AllowedMethods, AllowedPatterns, IgnoredMethods.
|
58
|
+
Metrics/PerceivedComplexity:
|
59
|
+
Max: 10
|
60
|
+
|
61
|
+
# Offense count: 72
|
62
|
+
# Configuration parameters: MinNameLength, AllowNamesEndingInNumbers, AllowedNames, ForbiddenNames.
|
63
|
+
# AllowedNames: at, by, db, id, in, io, ip, of, on, os, pp, to
|
64
|
+
Naming/MethodParameterName:
|
65
|
+
Exclude:
|
66
|
+
- 'lib/daru_lite/category.rb'
|
67
|
+
- 'lib/daru_lite/core/group_by.rb'
|
68
|
+
- 'lib/daru_lite/core/merge.rb'
|
69
|
+
- 'lib/daru_lite/core/query.rb'
|
70
|
+
- 'lib/daru_lite/dataframe.rb'
|
71
|
+
- 'lib/daru_lite/date_time/index.rb'
|
72
|
+
- 'lib/daru_lite/date_time/offsets.rb'
|
73
|
+
- 'lib/daru_lite/extensions/which_dsl.rb'
|
74
|
+
- 'lib/daru_lite/io/io.rb'
|
75
|
+
- 'lib/daru_lite/maths/statistics/dataframe.rb'
|
76
|
+
- 'lib/daru_lite/maths/statistics/vector.rb'
|
77
|
+
- 'lib/daru_lite/vector.rb'
|
78
|
+
|
79
|
+
# Offense count: 5
|
80
|
+
# Configuration parameters: NamePrefix, ForbiddenPrefixes, AllowedMethods, MethodDefinitionMacros.
|
81
|
+
# NamePrefix: is_, has_, have_
|
82
|
+
# ForbiddenPrefixes: is_, has_, have_
|
83
|
+
# AllowedMethods: is_a?
|
84
|
+
# MethodDefinitionMacros: define_method, define_singleton_method
|
85
|
+
Naming/PredicateName:
|
86
|
+
Exclude:
|
87
|
+
- 'spec/**/*'
|
88
|
+
- 'lib/daru_lite/dataframe.rb'
|
89
|
+
- 'lib/daru_lite/vector.rb'
|
90
|
+
|
91
|
+
# Offense count: 5
|
92
|
+
Security/MarshalLoad:
|
93
|
+
Exclude:
|
94
|
+
- 'lib/daru_lite/dataframe.rb'
|
95
|
+
- 'lib/daru_lite/date_time/index.rb'
|
96
|
+
- 'lib/daru_lite/index/index.rb'
|
97
|
+
- 'lib/daru_lite/io/io.rb'
|
98
|
+
- 'lib/daru_lite/vector.rb'
|
99
|
+
|
100
|
+
# Offense count: 2
|
101
|
+
Style/ClassVars:
|
102
|
+
Exclude:
|
103
|
+
- 'lib/daru_lite.rb'
|
104
|
+
|
105
|
+
# Offense count: 44
|
106
|
+
# Configuration parameters: AllowedConstants.
|
107
|
+
Style/Documentation:
|
108
|
+
Enabled: false
|
109
|
+
|
110
|
+
# Offense count: 7
|
111
|
+
# This cop supports unsafe autocorrection (--autocorrect-all).
|
112
|
+
Style/MapToHash:
|
113
|
+
Exclude:
|
114
|
+
- 'lib/daru_lite/category.rb'
|
115
|
+
- 'lib/daru_lite/core/group_by.rb'
|
116
|
+
- 'lib/daru_lite/dataframe.rb'
|
117
|
+
|
118
|
+
# Offense count: 1
|
119
|
+
Style/MultilineBlockChain:
|
120
|
+
Exclude:
|
121
|
+
- 'lib/daru_lite/formatters/table.rb'
|
122
|
+
|
123
|
+
# Offense count: 9
|
124
|
+
# Configuration parameters: AllowedMethods.
|
125
|
+
# AllowedMethods: respond_to_missing?
|
126
|
+
Style/OptionalBooleanParameter:
|
127
|
+
Exclude:
|
128
|
+
- 'lib/daru_lite/dataframe.rb'
|
129
|
+
- 'lib/daru_lite/maths/statistics/vector.rb'
|
130
|
+
- 'lib/daru_lite/vector.rb'
|
131
|
+
|
132
|
+
# Offense count: 1
|
133
|
+
# This cop supports unsafe autocorrection (--autocorrect-all).
|
134
|
+
Style/RedundantSelfAssignment:
|
135
|
+
Exclude:
|
136
|
+
- 'lib/daru_lite/dataframe.rb'
|
137
|
+
|
data/CONTRIBUTING.md
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
# Contributing guide
|
2
|
+
|
3
|
+
## Installing daru development dependencies
|
4
|
+
|
5
|
+
To install dependencies, execute the following commands:
|
6
|
+
|
7
|
+
``` bash
|
8
|
+
sudo apt-get update -qq
|
9
|
+
sudo apt-get install libmagickwand-dev imagemagick
|
10
|
+
bundle install
|
11
|
+
```
|
12
|
+
|
13
|
+
And run the test suite (should be all green with pending tests):
|
14
|
+
|
15
|
+
`bundle exec rspec`
|
16
|
+
|
17
|
+
While preparing your pull requests, don't forget to check your code with Rubocop:
|
18
|
+
|
19
|
+
`bundle exec rubocop`
|
20
|
+
|
21
|
+
[Optional] Install all Ruby versions which Daru currently supports with `rake spec setup`.
|
22
|
+
|
23
|
+
|
24
|
+
## Basic Development Flow
|
25
|
+
|
26
|
+
1. Create a new branch with `git checkout -b <branch_name>`.
|
27
|
+
2. Make your changes. Write tests covering every case how your feature will be used. If creating new files for tests, refer to the 'Testing' section [below](#Testing).
|
28
|
+
3. Try out these changes with `rake pry`.
|
29
|
+
4. Run the test suite with `rake spec`. (Alternatively you can use `guard` as described [here](https://github.com/SciRuby/daru/blob/master/CONTRIBUTING.md#testing). Also run Rubocop coding style guidelines with `rake cop`.
|
30
|
+
5. Commit the changes with `git commit -am "briefly describe what you did"` and submit pull request.
|
31
|
+
|
32
|
+
[Optional] You can run rspec for all Ruby versions at once with `rake spec run all`. But remember to first have all Ruby versions installed with `ruby spec setup`.
|
33
|
+
|
34
|
+
|
35
|
+
## Testing
|
36
|
+
|
37
|
+
Daru has automatic testing with Guard. Just execute the following code before you start editting a file and any change you make will trigger the appropriate tests-
|
38
|
+
|
39
|
+
```
|
40
|
+
guard
|
41
|
+
```
|
42
|
+
|
43
|
+
**NOTE**: Please make sure that you place test for your file at the same level and with same itermediatary directories. For example if code file lies in `lib/xyz/abc.rb` then its corresponding test should lie in `spec/xyz/abc_spec.rb`. This is to ensure correct working of Guard.
|
44
|
+
|
45
|
+
## Daru internals
|
46
|
+
|
47
|
+
To get an overview of certain internals of daru and their implementation, go over [this blog post](http://v0dro.github.io/blog/2015/08/16/elaboration-on-certain-internals-of-daru/).
|
data/Gemfile
ADDED
data/History.md
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
Copyright (c) 2014, Sameer Deshmukh
|
2
|
+
All rights reserved.
|
3
|
+
|
4
|
+
Redistribution and use in source and binary forms, with or without
|
5
|
+
modification, are permitted provided that the following conditions are met:
|
6
|
+
|
7
|
+
* Redistributions of source code must retain the above copyright notice, this
|
8
|
+
list of conditions and the following disclaimer.
|
9
|
+
|
10
|
+
* Redistributions in binary form must reproduce the above copyright notice,
|
11
|
+
this list of conditions and the following disclaimer in the documentation
|
12
|
+
and/or other materials provided with the distribution.
|
13
|
+
|
14
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
15
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
16
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
17
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
18
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
19
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
20
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
21
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
22
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
23
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
24
|
+
|
data/README.md
ADDED
@@ -0,0 +1,218 @@
|
|
1
|
+
# daru - Data Analysis in RUby
|
2
|
+
|
3
|
+
[![Gem Version](https://badge.fury.io/rb/daru.svg)](http://badge.fury.io/rb/daru)
|
4
|
+
[![Build Status](https://travis-ci.org/SciRuby/daru.svg?branch=master)](https://travis-ci.org/SciRuby/daru)
|
5
|
+
[![Gitter](https://badges.gitter.im/v0dro/daru.svg)](https://gitter.im/v0dro/daru?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge)
|
6
|
+
[![Open Source Helpers](https://www.codetriage.com/sciruby/daru/badges/users.svg)](https://www.codetriage.com/sciruby/daru)
|
7
|
+
|
8
|
+
## Introduction
|
9
|
+
|
10
|
+
daru (Data Analysis in RUby) is a library for storage, analysis, manipulation and visualization of data in Ruby.
|
11
|
+
|
12
|
+
daru makes it easy and intuitive to process data predominantly through 2 data structures:
|
13
|
+
`DaruLite::DataFrame` and `DaruLite::Vector`. Written in pure Ruby works with all ruby implementations.
|
14
|
+
Tested with MRI 2.5.1 and 2.7.1.
|
15
|
+
|
16
|
+
## daru plugin gems
|
17
|
+
|
18
|
+
- **[daru-view](https://github.com/SciRuby/daru-view)**
|
19
|
+
|
20
|
+
daru-view is for easy and interactive plotting in web application & IRuby
|
21
|
+
notebook. It can work in any Ruby web application frameworks like Rails, Sinatra, Nanoc and hopefully in others too.
|
22
|
+
|
23
|
+
Articles/Blogs, that summarize powerful features of daru-view:
|
24
|
+
|
25
|
+
* [GSoC 2017 daru-view](http://sciruby.com/blog/2017/09/01/gsoc-2017-data-visualization-using-daru-view/)
|
26
|
+
* [GSoC 2018 Progress Report](https://github.com/SciRuby/daru-view/wiki/GSoC-2018---Progress-Report)
|
27
|
+
* [HighCharts Official blog post regarding daru-view](https://www.highcharts.com/blog/post/i-am-ruby-developer-how-can-i-use-highcharts/)
|
28
|
+
|
29
|
+
- **[daru-io](https://github.com/SciRuby/daru-io)**
|
30
|
+
|
31
|
+
This gem extends support for many Import and Export methods of `DaruLite::DataFrame`. This gem is intended to help Rubyists who are into Data Analysis or Web Development, by serving as a general purpose conversion library that takes input in one format (say, JSON) and converts it another format (say, Avro) while also making it incredibly easy to getting started on analyzing data with daru. One can read more in [SciRuby/blog/daru-io](http://sciruby.com/blog/2017/08/29/gsoc-2017-support-to-import-export-of-more-formats/).
|
32
|
+
|
33
|
+
|
34
|
+
## Features
|
35
|
+
|
36
|
+
* Data structures:
|
37
|
+
- Vector - A basic 1-D vector.
|
38
|
+
- DataFrame - A 2-D spreadsheet-like structure for manipulating and storing data sets. This is daru's primary data structure.
|
39
|
+
* Compatible with [IRuby notebook](https://github.com/SciRuby/iruby), [statsample](https://github.com/SciRuby/statsample), [statsample-glm](https://github.com/SciRuby/statsample-glm) and [statsample-timeseries](https://github.com/SciRuby/statsample-timeseries).
|
40
|
+
* Support for time series.
|
41
|
+
* Singly and hierarchically indexed data structures.
|
42
|
+
* Flexible and intuitive API for manipulation and analysis of data.
|
43
|
+
* Easy plotting, statistics and arithmetic.
|
44
|
+
* Plentiful iterators.
|
45
|
+
* Easy splitting, aggregation and grouping of data.
|
46
|
+
* Quickly reducing data with pivot tables for quick data summary.
|
47
|
+
* Import and export data from and to Excel, CSV, SQL Databases, ActiveRecord and plain text files.
|
48
|
+
|
49
|
+
## Installation
|
50
|
+
|
51
|
+
```console
|
52
|
+
$ gem install daru_lite
|
53
|
+
```
|
54
|
+
|
55
|
+
## Notebooks
|
56
|
+
|
57
|
+
#### Notebooks on most use cases
|
58
|
+
|
59
|
+
* [Overview of most daru functions](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Daru%20Demo.ipynb)
|
60
|
+
* [Basic Creation of Vectors and DataFrame](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Creation%20of%20Vector%20and%20DataFrame.ipynb)
|
61
|
+
* [Detailed Usage of DaruLite::Vector](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Usage%20of%20Vector.ipynb)
|
62
|
+
* [Detailed Usage of DaruLite::DataFrame](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Usage%20of%20DataFrame.ipynb)
|
63
|
+
* [Searching and combining data in daru](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Searching%20and%20Combining%20Data.ipynb)
|
64
|
+
* [Grouping, Splitting and Pivoting Data](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Grouping%2C%20Splitting%20and%20Pivoting.ipynb)
|
65
|
+
* [Usage of Categorical Data](http://nbviewer.jupyter.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Categorical%20Data/Categorical%20Data.ipynb)
|
66
|
+
|
67
|
+
#### Visualization
|
68
|
+
* [Visualizing Data With DaruLite::DataFrame](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Visualization/Visualizing%20data%20with%20daru%20DataFrame.ipynb)
|
69
|
+
* [Plotting using GnuplotRB](http://nbviewer.jupyter.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Plotting/Gnuplotrb.ipynb)
|
70
|
+
* [Vector plotting with Gruff](http://nbviewer.jupyter.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Plotting/Gruff%20Vector.ipynb)
|
71
|
+
* [DataFrame plotting with Gruff](http://nbviewer.jupyter.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Plotting/Gruff%20DataFrame.ipynb)
|
72
|
+
|
73
|
+
#### Notebooks on Time series
|
74
|
+
|
75
|
+
* [Basic Time Series](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Basic%20Time%20Series.ipynb)
|
76
|
+
* [Time Series Analysis and Plotting](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Time%20Series%20Functions.ipynb)
|
77
|
+
|
78
|
+
#### Notebooks on Indexing
|
79
|
+
* [Indexing in Vector](http://nbviewer.jupyter.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Categorical%20Data/Indexing%20in%20Vector.ipynb)
|
80
|
+
* [Indexing in DataFrame](http://nbviewer.jupyter.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Categorical%20Data/Indexing%20in%20DataFrame.ipynb)
|
81
|
+
|
82
|
+
### Case Studies
|
83
|
+
|
84
|
+
* [Logistic Regression Analysis with daru and statsample-glm](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Logistic%20Regression%20with%20daru%20and%20statsample-glm.ipynb)
|
85
|
+
* [Finding and Plotting most heard artists from a Last.fm dataset](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Finding%20and%20plotting%20the%20most%20heard%20artists%20on%20last%20fm.ipynb)
|
86
|
+
* [Analyzing baby names with daru](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Analyzing%20baby%20names/Use%20Case%20-%20Daru%20for%20analyzing%20baby%20names%20data.ipynb)
|
87
|
+
* [Example usage of Categorical Data](http://nbviewer.jupyter.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Categorical%20Data/examples/%5BExample%5D%20Categorical%20Data.ipynb)
|
88
|
+
* [Example usage of Categorical Index](http://nbviewer.jupyter.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Categorical%20Data/examples/%5BExample%5D%20Categorical%20Index.ipynb)
|
89
|
+
|
90
|
+
## Blog Posts
|
91
|
+
|
92
|
+
* [Data Analysis in RUby: Basic data manipulation and plotting](http://v0dro.github.io/blog/2014/11/25/data-analysis-in-ruby-basic-data-manipulation-and-plotting/)
|
93
|
+
* [Data Analysis in RUby: Splitting, sorting, aggregating data and data types](http://v0dro.github.io/blog/2015/02/24/data-analysis-in-ruby-part-2/)
|
94
|
+
* [Finding and Combining data in daru](http://v0dro.github.io/blog/2015/08/03/finding-and-combining-data-in-daru/)
|
95
|
+
* [Introduction to analyzing datasets with daru library](http://gafur.me/2018/02/05/analysing-datasets-with-daru-library.html)
|
96
|
+
|
97
|
+
### Time series
|
98
|
+
|
99
|
+
* [Analysis of Time Series in daru](http://v0dro.github.io/blog/2015/07/31/analysis-of-time-series-in-daru/)
|
100
|
+
* [Date Offsets in Daru](http://v0dro.github.io/blog/2015/07/27/date-offsets-in-daru/)
|
101
|
+
|
102
|
+
### Categorical Data
|
103
|
+
|
104
|
+
* [Categorical Index](http://lokeshh.github.io/gsoc2016/blog/2016/06/14/categorical-index/)
|
105
|
+
* [Categorical Data](http://lokeshh.github.io/gsoc2016/blog/2016/06/21/categorical-data/)
|
106
|
+
* [Visualization with Categorical Data](http://lokeshh.github.io/gsoc2016/blog/2016/07/02/visualization/)
|
107
|
+
|
108
|
+
## Basic Usage
|
109
|
+
|
110
|
+
daru exposes two major data structures: `DataFrame` and `Vector`. The Vector is a basic 1-D structure corresponding to a labelled Array, while the `DataFrame` - daru's primary data structure - is 2-D spreadsheet-like structure for manipulating and storing data sets.
|
111
|
+
|
112
|
+
Basic DataFrame intitialization.
|
113
|
+
|
114
|
+
``` ruby
|
115
|
+
data_frame = DaruLite::DataFrame.new(
|
116
|
+
{
|
117
|
+
'Beer' => ['Kingfisher', 'Snow', 'Bud Light', 'Tiger Beer', 'Budweiser'],
|
118
|
+
'Gallons sold' => [500, 400, 450, 200, 250]
|
119
|
+
},
|
120
|
+
index: ['India', 'China', 'USA', 'Malaysia', 'Canada']
|
121
|
+
)
|
122
|
+
data_frame
|
123
|
+
```
|
124
|
+
![init0](images/init0.png)
|
125
|
+
|
126
|
+
|
127
|
+
Load data from CSV files.
|
128
|
+
``` ruby
|
129
|
+
df = DaruLite::DataFrame.from_csv('TradeoffData.csv')
|
130
|
+
```
|
131
|
+
![init1](images/init1.png)
|
132
|
+
|
133
|
+
*Basic Data Manipulation*
|
134
|
+
|
135
|
+
Selecting rows.
|
136
|
+
``` ruby
|
137
|
+
data_frame.row['USA']
|
138
|
+
```
|
139
|
+
![man0](images/man0.png)
|
140
|
+
|
141
|
+
Selecting columns.
|
142
|
+
``` ruby
|
143
|
+
data_frame['Beer']
|
144
|
+
```
|
145
|
+
![man1](images/man1.png)
|
146
|
+
|
147
|
+
A range of rows.
|
148
|
+
``` ruby
|
149
|
+
data_frame.row['India'..'USA']
|
150
|
+
```
|
151
|
+
![man2](images/man2.png)
|
152
|
+
|
153
|
+
The first 2 rows.
|
154
|
+
``` ruby
|
155
|
+
data_frame.first(2)
|
156
|
+
```
|
157
|
+
![man3](images/man3.png)
|
158
|
+
|
159
|
+
The last 2 rows.
|
160
|
+
``` ruby
|
161
|
+
data_frame.last(2)
|
162
|
+
```
|
163
|
+
![man4](images/man4.png)
|
164
|
+
|
165
|
+
Adding a new column.
|
166
|
+
``` ruby
|
167
|
+
data_frame['Gallons produced'] = [550, 500, 600, 210, 240]
|
168
|
+
```
|
169
|
+
![man5](images/man5.png)
|
170
|
+
|
171
|
+
Creating a new column based on data in other columns.
|
172
|
+
``` ruby
|
173
|
+
data_frame['Demand supply gap'] = data_frame['Gallons produced'] - data_frame['Gallons sold']
|
174
|
+
```
|
175
|
+
![man6](images/man6.png)
|
176
|
+
|
177
|
+
*Condition based selection*
|
178
|
+
|
179
|
+
Selecting countries based on the number of gallons sold in each. We use a syntax similar to that defined by [Arel](https://github.com/rails/arel), i.e. by using the `where` clause.
|
180
|
+
``` ruby
|
181
|
+
data_frame.where(data_frame['Gallons sold'].lt(300))
|
182
|
+
```
|
183
|
+
![con0](images/con0.png)
|
184
|
+
|
185
|
+
You can pass a combination of boolean operations into the `#where` method and it should work fine:
|
186
|
+
``` ruby
|
187
|
+
data_frame.where(
|
188
|
+
data_frame['Beer']
|
189
|
+
.in(['Snow', 'Kingfisher','Tiger Beer'])
|
190
|
+
.and(
|
191
|
+
data_frame['Gallons produced'].gt(520).or(data_frame['Gallons produced'].lt(250))
|
192
|
+
)
|
193
|
+
)
|
194
|
+
```
|
195
|
+
![con1](images/con1.png)
|
196
|
+
|
197
|
+
*Plotting*
|
198
|
+
|
199
|
+
daru supports plotting out of the box with [gnuplotrb](https://github.com/SciRuby/gnuplotrb).
|
200
|
+
|
201
|
+
## Documentation
|
202
|
+
|
203
|
+
Docs can be found [here](http://www.rubydoc.info/gems/daru).
|
204
|
+
|
205
|
+
## Contributing
|
206
|
+
|
207
|
+
Pick a feature from the Roadmap or the issue tracker or think of your own and send me a Pull Request!
|
208
|
+
|
209
|
+
For details see [CONTRIBUTING](https://github.com/SciRuby/daru/blob/master/CONTRIBUTING.md).
|
210
|
+
|
211
|
+
## Acknowledgements
|
212
|
+
|
213
|
+
* Google and the Ruby Science Foundation for the Google Summer of Code 2016 grant for speed enhancements and implementation of support for categorical data. Special thanks to [@lokeshh](https://github.com/lokeshh), [@zverok](https://github.com/zverok) and [@agisga](https://github.com/agisga) for their efforts.
|
214
|
+
* Google and the Ruby Science Foundation for the Google Summer of Code 2015 grant for further developing daru and integrating it with other ruby gems.
|
215
|
+
* Thank you [last.fm](http://www.last.fm/) for making user data accessible to the public.
|
216
|
+
|
217
|
+
Copyright (c) 2015, Sameer Deshmukh
|
218
|
+
All rights reserved
|
data/Rakefile
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
require 'rspec/core/rake_task'
|
2
|
+
require 'bundler/gem_tasks'
|
3
|
+
|
4
|
+
lib_folder = File.expand_path("../lib", __FILE__)
|
5
|
+
|
6
|
+
RUBIES = ['ruby-2.0.0-p643', 'ruby-2.1.5', 'ruby-2.2.1', 'ruby-2.3.0']
|
7
|
+
|
8
|
+
def spec_run_all
|
9
|
+
RUBIES.each do |ruby_v|
|
10
|
+
puts "\n Using #{ruby_v}\n\n"
|
11
|
+
command = "$rvm_path/wrappers/#{ruby_v}/rake summary"
|
12
|
+
run command
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
task :spec do
|
17
|
+
case ARGV[1]
|
18
|
+
when 'setup'
|
19
|
+
spec_setup
|
20
|
+
when 'run'
|
21
|
+
spec_run_all if ARGV[2] == 'all'
|
22
|
+
when nil
|
23
|
+
run 'rspec spec'
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
# Stubs
|
28
|
+
task :setup
|
29
|
+
task :run
|
30
|
+
task :all
|
31
|
+
|
32
|
+
def spec_setup
|
33
|
+
RUBIES.each do |ruby_v|
|
34
|
+
puts "Installing #{ruby_v}..."
|
35
|
+
run "rvm install #{ruby_v}"
|
36
|
+
path = "$rvm_path/wrappers/#{ruby_v}"
|
37
|
+
run "#{path}/gem install bundler"
|
38
|
+
run "#{path}/bundle install"
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
#task all: [:cop, :run_all]
|
43
|
+
|
44
|
+
task :summary do
|
45
|
+
run 'rspec spec/ -r ./.rspec_formatter.rb -f SimpleFormatter' rescue nil
|
46
|
+
end
|
47
|
+
|
48
|
+
#RSpec::Core::RakeTask.new(:spec)
|
49
|
+
|
50
|
+
task :default => :spec
|
51
|
+
|
52
|
+
task :console do |task|
|
53
|
+
cmd = [ 'irb', "-r '#{lib_folder}/daru_lite.rb'" ]
|
54
|
+
run *cmd
|
55
|
+
end
|
56
|
+
|
57
|
+
task :cop do |task|
|
58
|
+
run 'rubocop' rescue nil
|
59
|
+
end
|
60
|
+
|
61
|
+
task :pry do |task|
|
62
|
+
cmd = [ 'pry', "-r '#{lib_folder}/daru_lite.rb'" ]
|
63
|
+
run *cmd
|
64
|
+
end
|
65
|
+
|
66
|
+
def run *cmd
|
67
|
+
sh(cmd.join(" "))
|
68
|
+
end
|
69
|
+
|
data/ReleasePolicy.md
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
# Gem Release Policy
|
2
|
+
|
3
|
+
Applicable to Daru > 0.1.6
|
4
|
+
|
5
|
+
## Versioning
|
6
|
+
|
7
|
+
Daru follows semantic versioning whereby the version number is always in the form MAJOR.MINOR.PATCH
|
8
|
+
|
9
|
+
* Patch bump = Bug fixes
|
10
|
+
* Minor bump = New features but backwards compatible
|
11
|
+
* Major bump = API breaking changes
|
12
|
+
|
13
|
+
For Major and Minor bumps release candidates should be released around 2 weeks prior to the bump and are indicated by MAJOR.MINOR.0.rc.
|
14
|
+
|
15
|
+
For more information see the full semantic versioning specification at http://semver.org/.
|
16
|
+
|
17
|
+
## Release Timing
|
18
|
+
|
19
|
+
Patch releases should be done after every fix of a major bug (as tagged in the github issue tracker).
|
20
|
+
Major releases should be kept to the minimum.
|