daru_lite 0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (149) hide show
  1. checksums.yaml +7 -0
  2. data/.github/ISSUE_TEMPLATE.md +18 -0
  3. data/.github/workflows/ci.yml +33 -0
  4. data/.gitignore +10 -0
  5. data/.rspec +2 -0
  6. data/.rubocop.yml +27 -0
  7. data/.rubocop_todo.yml +137 -0
  8. data/CONTRIBUTING.md +47 -0
  9. data/Gemfile +2 -0
  10. data/History.md +4 -0
  11. data/LICENSE +24 -0
  12. data/README.md +218 -0
  13. data/Rakefile +69 -0
  14. data/ReleasePolicy.md +20 -0
  15. data/benchmarks/TradeoffData.csv +65 -0
  16. data/benchmarks/csv_reading.rb +22 -0
  17. data/benchmarks/dataframe_creation.rb +39 -0
  18. data/benchmarks/db_loading.rb +34 -0
  19. data/benchmarks/duplicating.rb +45 -0
  20. data/benchmarks/group_by.rb +32 -0
  21. data/benchmarks/joining.rb +52 -0
  22. data/benchmarks/row_access.rb +41 -0
  23. data/benchmarks/row_assign.rb +36 -0
  24. data/benchmarks/sorting.rb +51 -0
  25. data/benchmarks/statistics.rb +28 -0
  26. data/benchmarks/vector_access.rb +31 -0
  27. data/benchmarks/vector_assign.rb +42 -0
  28. data/benchmarks/where_clause.rb +48 -0
  29. data/benchmarks/where_vs_filter.rb +28 -0
  30. data/daru_lite.gemspec +55 -0
  31. data/images/README.md +5 -0
  32. data/images/con0.png +0 -0
  33. data/images/con1.png +0 -0
  34. data/images/init0.png +0 -0
  35. data/images/init1.png +0 -0
  36. data/images/man0.png +0 -0
  37. data/images/man1.png +0 -0
  38. data/images/man2.png +0 -0
  39. data/images/man3.png +0 -0
  40. data/images/man4.png +0 -0
  41. data/images/man5.png +0 -0
  42. data/images/man6.png +0 -0
  43. data/lib/daru_lite/accessors/array_wrapper.rb +109 -0
  44. data/lib/daru_lite/accessors/dataframe_by_row.rb +25 -0
  45. data/lib/daru_lite/accessors/mdarray_wrapper.rb +7 -0
  46. data/lib/daru_lite/category.rb +929 -0
  47. data/lib/daru_lite/configuration.rb +34 -0
  48. data/lib/daru_lite/core/group_by.rb +403 -0
  49. data/lib/daru_lite/core/merge.rb +270 -0
  50. data/lib/daru_lite/core/query.rb +109 -0
  51. data/lib/daru_lite/dataframe.rb +3080 -0
  52. data/lib/daru_lite/date_time/index.rb +569 -0
  53. data/lib/daru_lite/date_time/offsets.rb +397 -0
  54. data/lib/daru_lite/exceptions.rb +2 -0
  55. data/lib/daru_lite/extensions/which_dsl.rb +53 -0
  56. data/lib/daru_lite/formatters/table.rb +52 -0
  57. data/lib/daru_lite/helpers/array.rb +53 -0
  58. data/lib/daru_lite/index/categorical_index.rb +201 -0
  59. data/lib/daru_lite/index/index.rb +374 -0
  60. data/lib/daru_lite/index/multi_index.rb +374 -0
  61. data/lib/daru_lite/io/csv/converters.rb +21 -0
  62. data/lib/daru_lite/io/io.rb +294 -0
  63. data/lib/daru_lite/io/sql_data_source.rb +97 -0
  64. data/lib/daru_lite/iruby/helpers.rb +38 -0
  65. data/lib/daru_lite/iruby/templates/dataframe.html.erb +5 -0
  66. data/lib/daru_lite/iruby/templates/dataframe_mi.html.erb +5 -0
  67. data/lib/daru_lite/iruby/templates/dataframe_mi_tbody.html.erb +35 -0
  68. data/lib/daru_lite/iruby/templates/dataframe_mi_thead.html.erb +21 -0
  69. data/lib/daru_lite/iruby/templates/dataframe_tbody.html.erb +28 -0
  70. data/lib/daru_lite/iruby/templates/dataframe_thead.html.erb +21 -0
  71. data/lib/daru_lite/iruby/templates/multi_index.html.erb +12 -0
  72. data/lib/daru_lite/iruby/templates/vector.html.erb +5 -0
  73. data/lib/daru_lite/iruby/templates/vector_mi.html.erb +5 -0
  74. data/lib/daru_lite/iruby/templates/vector_mi_tbody.html.erb +26 -0
  75. data/lib/daru_lite/iruby/templates/vector_mi_thead.html.erb +8 -0
  76. data/lib/daru_lite/iruby/templates/vector_tbody.html.erb +17 -0
  77. data/lib/daru_lite/iruby/templates/vector_thead.html.erb +8 -0
  78. data/lib/daru_lite/maths/arithmetic/dataframe.rb +91 -0
  79. data/lib/daru_lite/maths/arithmetic/vector.rb +117 -0
  80. data/lib/daru_lite/maths/statistics/dataframe.rb +202 -0
  81. data/lib/daru_lite/maths/statistics/vector.rb +1019 -0
  82. data/lib/daru_lite/monkeys.rb +56 -0
  83. data/lib/daru_lite/vector.rb +1678 -0
  84. data/lib/daru_lite/version.rb +3 -0
  85. data/lib/daru_lite.rb +99 -0
  86. data/profile/_base.rb +23 -0
  87. data/profile/df_to_a.rb +10 -0
  88. data/profile/filter.rb +13 -0
  89. data/profile/joining.rb +13 -0
  90. data/profile/sorting.rb +12 -0
  91. data/profile/vector_each_with_index.rb +9 -0
  92. data/profile/vector_new.rb +9 -0
  93. data/spec/accessors/array_wrapper_spec.rb +3 -0
  94. data/spec/category_spec.rb +1741 -0
  95. data/spec/core/group_by_spec.rb +655 -0
  96. data/spec/core/merge_spec.rb +179 -0
  97. data/spec/core/query_spec.rb +347 -0
  98. data/spec/daru_lite_spec.rb +22 -0
  99. data/spec/dataframe_spec.rb +4330 -0
  100. data/spec/date_time/data_spec.rb +197 -0
  101. data/spec/date_time/date_time_index_helper_spec.rb +72 -0
  102. data/spec/date_time/index_spec.rb +588 -0
  103. data/spec/date_time/offsets_spec.rb +465 -0
  104. data/spec/extensions/which_dsl_spec.rb +38 -0
  105. data/spec/fixtures/bank2.dat +200 -0
  106. data/spec/fixtures/boolean_converter_test.csv +5 -0
  107. data/spec/fixtures/countries.json +7794 -0
  108. data/spec/fixtures/duplicates.csv +32 -0
  109. data/spec/fixtures/eciresults.html +394 -0
  110. data/spec/fixtures/empties.dat +2 -0
  111. data/spec/fixtures/empty_rows_test.csv +17 -0
  112. data/spec/fixtures/macau.html +3691 -0
  113. data/spec/fixtures/macd_data.csv +150 -0
  114. data/spec/fixtures/matrix_test.csv +100 -0
  115. data/spec/fixtures/moneycontrol.html +6812 -0
  116. data/spec/fixtures/music_data.tsv +2501 -0
  117. data/spec/fixtures/repeated_fields.csv +7 -0
  118. data/spec/fixtures/sales-funnel.csv +18 -0
  119. data/spec/fixtures/scientific_notation.csv +4 -0
  120. data/spec/fixtures/string_converter_test.csv +5 -0
  121. data/spec/fixtures/strings.dat +2 -0
  122. data/spec/fixtures/test_xls.xls +0 -0
  123. data/spec/fixtures/test_xls_2.xls +0 -0
  124. data/spec/fixtures/url_test.txt~ +0 -0
  125. data/spec/fixtures/valid_markup.html +62 -0
  126. data/spec/fixtures/wiki_climate.html +1243 -0
  127. data/spec/fixtures/wiki_table_info.html +631 -0
  128. data/spec/formatters/table_formatter_spec.rb +137 -0
  129. data/spec/helpers_spec.rb +8 -0
  130. data/spec/index/categorical_index_spec.rb +170 -0
  131. data/spec/index/index_spec.rb +417 -0
  132. data/spec/index/multi_index_spec.rb +680 -0
  133. data/spec/io/io_spec.rb +373 -0
  134. data/spec/io/sql_data_source_spec.rb +56 -0
  135. data/spec/iruby/dataframe_spec.rb +170 -0
  136. data/spec/iruby/helpers_spec.rb +49 -0
  137. data/spec/iruby/multi_index_spec.rb +37 -0
  138. data/spec/iruby/vector_spec.rb +105 -0
  139. data/spec/maths/arithmetic/dataframe_spec.rb +148 -0
  140. data/spec/maths/arithmetic/vector_spec.rb +165 -0
  141. data/spec/maths/statistics/dataframe_spec.rb +178 -0
  142. data/spec/maths/statistics/vector_spec.rb +756 -0
  143. data/spec/monkeys_spec.rb +42 -0
  144. data/spec/shared/vector_display_spec.rb +213 -0
  145. data/spec/spec_helper.rb +87 -0
  146. data/spec/support/database_helper.rb +30 -0
  147. data/spec/support/matchers.rb +5 -0
  148. data/spec/vector_spec.rb +2293 -0
  149. metadata +571 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 37e2e2dcfa3ec410a0633e843d645485cecddb76690373a383e26740375ba658
4
+ data.tar.gz: 6f3c87500e54eef1753b604676e44224c8ddd64197ef39e7c1a1d3cdd5d3a33b
5
+ SHA512:
6
+ metadata.gz: f7a617ae7bdeb75e3368dfcfd4cb1f6ba96c31940fe2f94e73fc0ddca5e34754d4cc20855e249d166c72c957067b64c5de4220b79003424c2288ad560bfc4e2c
7
+ data.tar.gz: d22eb2e97a63a1a5595b0e70bc6e601861b838ab82742239fd7244d9de5d659afec85d21ed03a8b75c4a7e532b57ae6496147ee74ecb48dfe7f3beb54a231866
@@ -0,0 +1,18 @@
1
+ Heya! We are glad you are going to contribute to Daru by creating an issue, and kindly ask you to
2
+ follow the simple rules:
3
+
4
+ 1. If it is a bug report, please provide a **self-containing** Ruby code for reproducing the bug.
5
+ This means if Daru contributors just copy-paste the code from issue into `this-is-bug.rb` and run
6
+ `ruby this-is-bug.rb`, it will be reproduced. If the bug is hard to spot (e.g. it is not some
7
+ `NoMethodError`, but the differences in data structure), please show it with comment in code or
8
+ plain text in the issue.
9
+ 2. If it is a feature request, try to do the following (if possible):
10
+ * show how new feature will work with small code example;
11
+ * explain the use case (if it is not 200% obvious);
12
+ * if you are aware of it, show how it works in pandas and/or R.
13
+ 3. If it is just a question ("how to do this or that" or "why Daru does this or that") feel free to
14
+ write it in any form that is convenient to you, but remember code examples and use cases are always
15
+ welcome.
16
+
17
+ Thanks! And please remove this text when finished with your issue description :)
18
+
@@ -0,0 +1,33 @@
1
+ name: CI
2
+ on: [push]
3
+
4
+ jobs:
5
+ lint:
6
+ runs-on: ubuntu-latest
7
+
8
+ steps:
9
+ - uses: actions/checkout@v3
10
+ - name: Set up Ruby
11
+ uses: ruby/setup-ruby@v1
12
+ with:
13
+ ruby-version: 3.1
14
+ bundler-cache: true # runs 'bundle install' and caches installed gems automatically
15
+ - name: Run linters
16
+ run: |
17
+ bundle exec rubocop
18
+
19
+ test:
20
+ runs-on: ubuntu-latest
21
+ strategy:
22
+ matrix:
23
+ ruby-version: ['3.1']
24
+
25
+ steps:
26
+ - uses: actions/checkout@v3
27
+ - name: Set up Ruby
28
+ uses: ruby/setup-ruby@v1
29
+ with:
30
+ ruby-version: ${{ matrix.ruby-version }}
31
+ bundler-cache: true # runs 'bundle install' and caches installed gems automatically
32
+ - name: Run tests
33
+ run: bundle exec rspec
data/.gitignore ADDED
@@ -0,0 +1,10 @@
1
+ *.gem
2
+ Gemfile.lock
3
+ doc/
4
+ .yardoc/
5
+ .bundle
6
+ vendor/
7
+ profile/out/
8
+ coverage/
9
+ .ruby-version
10
+ .byebug_history
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --require ./spec/spec_helper.rb
2
+ --color
data/.rubocop.yml ADDED
@@ -0,0 +1,27 @@
1
+ inherit_from: .rubocop_todo.yml
2
+
3
+ AllCops:
4
+ Include:
5
+ - 'lib/**/*'
6
+ Exclude:
7
+ - 'daru_lite.gemspec'
8
+ - 'Rakefile'
9
+ - 'Gemfile'
10
+ - 'Guardfile'
11
+ - '**/*.erb'
12
+ - 'spec/*'
13
+ - 'spec/**/*'
14
+ - 'vendor/**/*'
15
+ - 'benchmarks/*'
16
+ - 'profile/*'
17
+ - 'tmp/*'
18
+ DisplayCopNames: true
19
+ TargetRubyVersion: 2.7
20
+ NewCops: enable
21
+
22
+ require:
23
+ - rubocop-performance
24
+ - rubocop-rspec
25
+
26
+ Style/FrozenStringLiteralComment:
27
+ EnforcedStyle: never
data/.rubocop_todo.yml ADDED
@@ -0,0 +1,137 @@
1
+ # This configuration was generated by
2
+ # `rubocop --auto-gen-config`
3
+ # on 2022-08-16 13:20:50 UTC using RuboCop version 1.35.0.
4
+ # The point is for the user to remove these configuration records
5
+ # one by one as the offenses are removed from the code base.
6
+ # Note that changes in the inspected code, or installation of new
7
+ # versions of RuboCop, may require this file to be generated again.
8
+
9
+ # Offense count: 1
10
+ # Configuration parameters: AllowComments.
11
+ Lint/EmptyClass:
12
+ Exclude:
13
+ - 'lib/daru_lite/accessors/mdarray_wrapper.rb'
14
+
15
+ # Offense count: 5
16
+ Lint/MissingSuper:
17
+ Exclude:
18
+ - 'lib/daru_lite/date_time/offsets.rb'
19
+ - 'lib/daru_lite/index/categorical_index.rb'
20
+ - 'lib/daru_lite/index/index.rb'
21
+ - 'lib/daru_lite/index/multi_index.rb'
22
+
23
+ # Offense count: 6
24
+ # Configuration parameters: CheckForMethodsWithNoSideEffects.
25
+ Lint/Void:
26
+ Exclude:
27
+ - 'lib/daru_lite/category.rb'
28
+ - 'lib/daru_lite/dataframe.rb'
29
+ - 'lib/daru_lite/vector.rb'
30
+
31
+ # Offense count: 40
32
+ # Configuration parameters: AllowedMethods, AllowedPatterns, IgnoredMethods, CountRepeatedAttributes.
33
+ Metrics/AbcSize:
34
+ Max: 34
35
+
36
+ # Offense count: 3
37
+ # Configuration parameters: CountComments, CountAsOne.
38
+ Metrics/ClassLength:
39
+ Max: 189
40
+
41
+ # Offense count: 6
42
+ # Configuration parameters: AllowedMethods, AllowedPatterns, IgnoredMethods.
43
+ Metrics/CyclomaticComplexity:
44
+ Max: 9
45
+
46
+ # Offense count: 61
47
+ # Configuration parameters: CountComments, CountAsOne, ExcludedMethods, AllowedMethods, AllowedPatterns, IgnoredMethods.
48
+ Metrics/MethodLength:
49
+ Max: 15
50
+
51
+ # Offense count: 2
52
+ # Configuration parameters: CountComments, CountAsOne.
53
+ Metrics/ModuleLength:
54
+ Max: 190
55
+
56
+ # Offense count: 4
57
+ # Configuration parameters: AllowedMethods, AllowedPatterns, IgnoredMethods.
58
+ Metrics/PerceivedComplexity:
59
+ Max: 10
60
+
61
+ # Offense count: 72
62
+ # Configuration parameters: MinNameLength, AllowNamesEndingInNumbers, AllowedNames, ForbiddenNames.
63
+ # AllowedNames: at, by, db, id, in, io, ip, of, on, os, pp, to
64
+ Naming/MethodParameterName:
65
+ Exclude:
66
+ - 'lib/daru_lite/category.rb'
67
+ - 'lib/daru_lite/core/group_by.rb'
68
+ - 'lib/daru_lite/core/merge.rb'
69
+ - 'lib/daru_lite/core/query.rb'
70
+ - 'lib/daru_lite/dataframe.rb'
71
+ - 'lib/daru_lite/date_time/index.rb'
72
+ - 'lib/daru_lite/date_time/offsets.rb'
73
+ - 'lib/daru_lite/extensions/which_dsl.rb'
74
+ - 'lib/daru_lite/io/io.rb'
75
+ - 'lib/daru_lite/maths/statistics/dataframe.rb'
76
+ - 'lib/daru_lite/maths/statistics/vector.rb'
77
+ - 'lib/daru_lite/vector.rb'
78
+
79
+ # Offense count: 5
80
+ # Configuration parameters: NamePrefix, ForbiddenPrefixes, AllowedMethods, MethodDefinitionMacros.
81
+ # NamePrefix: is_, has_, have_
82
+ # ForbiddenPrefixes: is_, has_, have_
83
+ # AllowedMethods: is_a?
84
+ # MethodDefinitionMacros: define_method, define_singleton_method
85
+ Naming/PredicateName:
86
+ Exclude:
87
+ - 'spec/**/*'
88
+ - 'lib/daru_lite/dataframe.rb'
89
+ - 'lib/daru_lite/vector.rb'
90
+
91
+ # Offense count: 5
92
+ Security/MarshalLoad:
93
+ Exclude:
94
+ - 'lib/daru_lite/dataframe.rb'
95
+ - 'lib/daru_lite/date_time/index.rb'
96
+ - 'lib/daru_lite/index/index.rb'
97
+ - 'lib/daru_lite/io/io.rb'
98
+ - 'lib/daru_lite/vector.rb'
99
+
100
+ # Offense count: 2
101
+ Style/ClassVars:
102
+ Exclude:
103
+ - 'lib/daru_lite.rb'
104
+
105
+ # Offense count: 44
106
+ # Configuration parameters: AllowedConstants.
107
+ Style/Documentation:
108
+ Enabled: false
109
+
110
+ # Offense count: 7
111
+ # This cop supports unsafe autocorrection (--autocorrect-all).
112
+ Style/MapToHash:
113
+ Exclude:
114
+ - 'lib/daru_lite/category.rb'
115
+ - 'lib/daru_lite/core/group_by.rb'
116
+ - 'lib/daru_lite/dataframe.rb'
117
+
118
+ # Offense count: 1
119
+ Style/MultilineBlockChain:
120
+ Exclude:
121
+ - 'lib/daru_lite/formatters/table.rb'
122
+
123
+ # Offense count: 9
124
+ # Configuration parameters: AllowedMethods.
125
+ # AllowedMethods: respond_to_missing?
126
+ Style/OptionalBooleanParameter:
127
+ Exclude:
128
+ - 'lib/daru_lite/dataframe.rb'
129
+ - 'lib/daru_lite/maths/statistics/vector.rb'
130
+ - 'lib/daru_lite/vector.rb'
131
+
132
+ # Offense count: 1
133
+ # This cop supports unsafe autocorrection (--autocorrect-all).
134
+ Style/RedundantSelfAssignment:
135
+ Exclude:
136
+ - 'lib/daru_lite/dataframe.rb'
137
+
data/CONTRIBUTING.md ADDED
@@ -0,0 +1,47 @@
1
+ # Contributing guide
2
+
3
+ ## Installing daru development dependencies
4
+
5
+ To install dependencies, execute the following commands:
6
+
7
+ ``` bash
8
+ sudo apt-get update -qq
9
+ sudo apt-get install libmagickwand-dev imagemagick
10
+ bundle install
11
+ ```
12
+
13
+ And run the test suite (should be all green with pending tests):
14
+
15
+ `bundle exec rspec`
16
+
17
+ While preparing your pull requests, don't forget to check your code with Rubocop:
18
+
19
+ `bundle exec rubocop`
20
+
21
+ [Optional] Install all Ruby versions which Daru currently supports with `rake spec setup`.
22
+
23
+
24
+ ## Basic Development Flow
25
+
26
+ 1. Create a new branch with `git checkout -b <branch_name>`.
27
+ 2. Make your changes. Write tests covering every case how your feature will be used. If creating new files for tests, refer to the 'Testing' section [below](#Testing).
28
+ 3. Try out these changes with `rake pry`.
29
+ 4. Run the test suite with `rake spec`. (Alternatively you can use `guard` as described [here](https://github.com/SciRuby/daru/blob/master/CONTRIBUTING.md#testing). Also run Rubocop coding style guidelines with `rake cop`.
30
+ 5. Commit the changes with `git commit -am "briefly describe what you did"` and submit pull request.
31
+
32
+ [Optional] You can run rspec for all Ruby versions at once with `rake spec run all`. But remember to first have all Ruby versions installed with `ruby spec setup`.
33
+
34
+
35
+ ## Testing
36
+
37
+ Daru has automatic testing with Guard. Just execute the following code before you start editting a file and any change you make will trigger the appropriate tests-
38
+
39
+ ```
40
+ guard
41
+ ```
42
+
43
+ **NOTE**: Please make sure that you place test for your file at the same level and with same itermediatary directories. For example if code file lies in `lib/xyz/abc.rb` then its corresponding test should lie in `spec/xyz/abc_spec.rb`. This is to ensure correct working of Guard.
44
+
45
+ ## Daru internals
46
+
47
+ To get an overview of certain internals of daru and their implementation, go over [this blog post](http://v0dro.github.io/blog/2015/08/16/elaboration-on-certain-internals-of-daru/).
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source 'https://rubygems.org'
2
+ gemspec
data/History.md ADDED
@@ -0,0 +1,4 @@
1
+ # 0.0.1
2
+ * Forked from Daru
3
+ * Fixed incompatibilities with Ruby 3.1
4
+ * Removed plotting features
data/LICENSE ADDED
@@ -0,0 +1,24 @@
1
+ Copyright (c) 2014, Sameer Deshmukh
2
+ All rights reserved.
3
+
4
+ Redistribution and use in source and binary forms, with or without
5
+ modification, are permitted provided that the following conditions are met:
6
+
7
+ * Redistributions of source code must retain the above copyright notice, this
8
+ list of conditions and the following disclaimer.
9
+
10
+ * Redistributions in binary form must reproduce the above copyright notice,
11
+ this list of conditions and the following disclaimer in the documentation
12
+ and/or other materials provided with the distribution.
13
+
14
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
18
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24
+
data/README.md ADDED
@@ -0,0 +1,218 @@
1
+ # daru - Data Analysis in RUby
2
+
3
+ [![Gem Version](https://badge.fury.io/rb/daru.svg)](http://badge.fury.io/rb/daru)
4
+ [![Build Status](https://travis-ci.org/SciRuby/daru.svg?branch=master)](https://travis-ci.org/SciRuby/daru)
5
+ [![Gitter](https://badges.gitter.im/v0dro/daru.svg)](https://gitter.im/v0dro/daru?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge)
6
+ [![Open Source Helpers](https://www.codetriage.com/sciruby/daru/badges/users.svg)](https://www.codetriage.com/sciruby/daru)
7
+
8
+ ## Introduction
9
+
10
+ daru (Data Analysis in RUby) is a library for storage, analysis, manipulation and visualization of data in Ruby.
11
+
12
+ daru makes it easy and intuitive to process data predominantly through 2 data structures:
13
+ `DaruLite::DataFrame` and `DaruLite::Vector`. Written in pure Ruby works with all ruby implementations.
14
+ Tested with MRI 2.5.1 and 2.7.1.
15
+
16
+ ## daru plugin gems
17
+
18
+ - **[daru-view](https://github.com/SciRuby/daru-view)**
19
+
20
+ daru-view is for easy and interactive plotting in web application & IRuby
21
+ notebook. It can work in any Ruby web application frameworks like Rails, Sinatra, Nanoc and hopefully in others too.
22
+
23
+ Articles/Blogs, that summarize powerful features of daru-view:
24
+
25
+ * [GSoC 2017 daru-view](http://sciruby.com/blog/2017/09/01/gsoc-2017-data-visualization-using-daru-view/)
26
+ * [GSoC 2018 Progress Report](https://github.com/SciRuby/daru-view/wiki/GSoC-2018---Progress-Report)
27
+ * [HighCharts Official blog post regarding daru-view](https://www.highcharts.com/blog/post/i-am-ruby-developer-how-can-i-use-highcharts/)
28
+
29
+ - **[daru-io](https://github.com/SciRuby/daru-io)**
30
+
31
+ This gem extends support for many Import and Export methods of `DaruLite::DataFrame`. This gem is intended to help Rubyists who are into Data Analysis or Web Development, by serving as a general purpose conversion library that takes input in one format (say, JSON) and converts it another format (say, Avro) while also making it incredibly easy to getting started on analyzing data with daru. One can read more in [SciRuby/blog/daru-io](http://sciruby.com/blog/2017/08/29/gsoc-2017-support-to-import-export-of-more-formats/).
32
+
33
+
34
+ ## Features
35
+
36
+ * Data structures:
37
+ - Vector - A basic 1-D vector.
38
+ - DataFrame - A 2-D spreadsheet-like structure for manipulating and storing data sets. This is daru's primary data structure.
39
+ * Compatible with [IRuby notebook](https://github.com/SciRuby/iruby), [statsample](https://github.com/SciRuby/statsample), [statsample-glm](https://github.com/SciRuby/statsample-glm) and [statsample-timeseries](https://github.com/SciRuby/statsample-timeseries).
40
+ * Support for time series.
41
+ * Singly and hierarchically indexed data structures.
42
+ * Flexible and intuitive API for manipulation and analysis of data.
43
+ * Easy plotting, statistics and arithmetic.
44
+ * Plentiful iterators.
45
+ * Easy splitting, aggregation and grouping of data.
46
+ * Quickly reducing data with pivot tables for quick data summary.
47
+ * Import and export data from and to Excel, CSV, SQL Databases, ActiveRecord and plain text files.
48
+
49
+ ## Installation
50
+
51
+ ```console
52
+ $ gem install daru_lite
53
+ ```
54
+
55
+ ## Notebooks
56
+
57
+ #### Notebooks on most use cases
58
+
59
+ * [Overview of most daru functions](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Daru%20Demo.ipynb)
60
+ * [Basic Creation of Vectors and DataFrame](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Creation%20of%20Vector%20and%20DataFrame.ipynb)
61
+ * [Detailed Usage of DaruLite::Vector](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Usage%20of%20Vector.ipynb)
62
+ * [Detailed Usage of DaruLite::DataFrame](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Usage%20of%20DataFrame.ipynb)
63
+ * [Searching and combining data in daru](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Searching%20and%20Combining%20Data.ipynb)
64
+ * [Grouping, Splitting and Pivoting Data](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Grouping%2C%20Splitting%20and%20Pivoting.ipynb)
65
+ * [Usage of Categorical Data](http://nbviewer.jupyter.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Categorical%20Data/Categorical%20Data.ipynb)
66
+
67
+ #### Visualization
68
+ * [Visualizing Data With DaruLite::DataFrame](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Visualization/Visualizing%20data%20with%20daru%20DataFrame.ipynb)
69
+ * [Plotting using GnuplotRB](http://nbviewer.jupyter.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Plotting/Gnuplotrb.ipynb)
70
+ * [Vector plotting with Gruff](http://nbviewer.jupyter.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Plotting/Gruff%20Vector.ipynb)
71
+ * [DataFrame plotting with Gruff](http://nbviewer.jupyter.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Plotting/Gruff%20DataFrame.ipynb)
72
+
73
+ #### Notebooks on Time series
74
+
75
+ * [Basic Time Series](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Basic%20Time%20Series.ipynb)
76
+ * [Time Series Analysis and Plotting](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Time%20Series%20Functions.ipynb)
77
+
78
+ #### Notebooks on Indexing
79
+ * [Indexing in Vector](http://nbviewer.jupyter.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Categorical%20Data/Indexing%20in%20Vector.ipynb)
80
+ * [Indexing in DataFrame](http://nbviewer.jupyter.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Categorical%20Data/Indexing%20in%20DataFrame.ipynb)
81
+
82
+ ### Case Studies
83
+
84
+ * [Logistic Regression Analysis with daru and statsample-glm](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Logistic%20Regression%20with%20daru%20and%20statsample-glm.ipynb)
85
+ * [Finding and Plotting most heard artists from a Last.fm dataset](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Finding%20and%20plotting%20the%20most%20heard%20artists%20on%20last%20fm.ipynb)
86
+ * [Analyzing baby names with daru](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Analyzing%20baby%20names/Use%20Case%20-%20Daru%20for%20analyzing%20baby%20names%20data.ipynb)
87
+ * [Example usage of Categorical Data](http://nbviewer.jupyter.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Categorical%20Data/examples/%5BExample%5D%20Categorical%20Data.ipynb)
88
+ * [Example usage of Categorical Index](http://nbviewer.jupyter.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Categorical%20Data/examples/%5BExample%5D%20Categorical%20Index.ipynb)
89
+
90
+ ## Blog Posts
91
+
92
+ * [Data Analysis in RUby: Basic data manipulation and plotting](http://v0dro.github.io/blog/2014/11/25/data-analysis-in-ruby-basic-data-manipulation-and-plotting/)
93
+ * [Data Analysis in RUby: Splitting, sorting, aggregating data and data types](http://v0dro.github.io/blog/2015/02/24/data-analysis-in-ruby-part-2/)
94
+ * [Finding and Combining data in daru](http://v0dro.github.io/blog/2015/08/03/finding-and-combining-data-in-daru/)
95
+ * [Introduction to analyzing datasets with daru library](http://gafur.me/2018/02/05/analysing-datasets-with-daru-library.html)
96
+
97
+ ### Time series
98
+
99
+ * [Analysis of Time Series in daru](http://v0dro.github.io/blog/2015/07/31/analysis-of-time-series-in-daru/)
100
+ * [Date Offsets in Daru](http://v0dro.github.io/blog/2015/07/27/date-offsets-in-daru/)
101
+
102
+ ### Categorical Data
103
+
104
+ * [Categorical Index](http://lokeshh.github.io/gsoc2016/blog/2016/06/14/categorical-index/)
105
+ * [Categorical Data](http://lokeshh.github.io/gsoc2016/blog/2016/06/21/categorical-data/)
106
+ * [Visualization with Categorical Data](http://lokeshh.github.io/gsoc2016/blog/2016/07/02/visualization/)
107
+
108
+ ## Basic Usage
109
+
110
+ daru exposes two major data structures: `DataFrame` and `Vector`. The Vector is a basic 1-D structure corresponding to a labelled Array, while the `DataFrame` - daru's primary data structure - is 2-D spreadsheet-like structure for manipulating and storing data sets.
111
+
112
+ Basic DataFrame intitialization.
113
+
114
+ ``` ruby
115
+ data_frame = DaruLite::DataFrame.new(
116
+ {
117
+ 'Beer' => ['Kingfisher', 'Snow', 'Bud Light', 'Tiger Beer', 'Budweiser'],
118
+ 'Gallons sold' => [500, 400, 450, 200, 250]
119
+ },
120
+ index: ['India', 'China', 'USA', 'Malaysia', 'Canada']
121
+ )
122
+ data_frame
123
+ ```
124
+ ![init0](images/init0.png)
125
+
126
+
127
+ Load data from CSV files.
128
+ ``` ruby
129
+ df = DaruLite::DataFrame.from_csv('TradeoffData.csv')
130
+ ```
131
+ ![init1](images/init1.png)
132
+
133
+ *Basic Data Manipulation*
134
+
135
+ Selecting rows.
136
+ ``` ruby
137
+ data_frame.row['USA']
138
+ ```
139
+ ![man0](images/man0.png)
140
+
141
+ Selecting columns.
142
+ ``` ruby
143
+ data_frame['Beer']
144
+ ```
145
+ ![man1](images/man1.png)
146
+
147
+ A range of rows.
148
+ ``` ruby
149
+ data_frame.row['India'..'USA']
150
+ ```
151
+ ![man2](images/man2.png)
152
+
153
+ The first 2 rows.
154
+ ``` ruby
155
+ data_frame.first(2)
156
+ ```
157
+ ![man3](images/man3.png)
158
+
159
+ The last 2 rows.
160
+ ``` ruby
161
+ data_frame.last(2)
162
+ ```
163
+ ![man4](images/man4.png)
164
+
165
+ Adding a new column.
166
+ ``` ruby
167
+ data_frame['Gallons produced'] = [550, 500, 600, 210, 240]
168
+ ```
169
+ ![man5](images/man5.png)
170
+
171
+ Creating a new column based on data in other columns.
172
+ ``` ruby
173
+ data_frame['Demand supply gap'] = data_frame['Gallons produced'] - data_frame['Gallons sold']
174
+ ```
175
+ ![man6](images/man6.png)
176
+
177
+ *Condition based selection*
178
+
179
+ Selecting countries based on the number of gallons sold in each. We use a syntax similar to that defined by [Arel](https://github.com/rails/arel), i.e. by using the `where` clause.
180
+ ``` ruby
181
+ data_frame.where(data_frame['Gallons sold'].lt(300))
182
+ ```
183
+ ![con0](images/con0.png)
184
+
185
+ You can pass a combination of boolean operations into the `#where` method and it should work fine:
186
+ ``` ruby
187
+ data_frame.where(
188
+ data_frame['Beer']
189
+ .in(['Snow', 'Kingfisher','Tiger Beer'])
190
+ .and(
191
+ data_frame['Gallons produced'].gt(520).or(data_frame['Gallons produced'].lt(250))
192
+ )
193
+ )
194
+ ```
195
+ ![con1](images/con1.png)
196
+
197
+ *Plotting*
198
+
199
+ daru supports plotting out of the box with [gnuplotrb](https://github.com/SciRuby/gnuplotrb).
200
+
201
+ ## Documentation
202
+
203
+ Docs can be found [here](http://www.rubydoc.info/gems/daru).
204
+
205
+ ## Contributing
206
+
207
+ Pick a feature from the Roadmap or the issue tracker or think of your own and send me a Pull Request!
208
+
209
+ For details see [CONTRIBUTING](https://github.com/SciRuby/daru/blob/master/CONTRIBUTING.md).
210
+
211
+ ## Acknowledgements
212
+
213
+ * Google and the Ruby Science Foundation for the Google Summer of Code 2016 grant for speed enhancements and implementation of support for categorical data. Special thanks to [@lokeshh](https://github.com/lokeshh), [@zverok](https://github.com/zverok) and [@agisga](https://github.com/agisga) for their efforts.
214
+ * Google and the Ruby Science Foundation for the Google Summer of Code 2015 grant for further developing daru and integrating it with other ruby gems.
215
+ * Thank you [last.fm](http://www.last.fm/) for making user data accessible to the public.
216
+
217
+ Copyright (c) 2015, Sameer Deshmukh
218
+ All rights reserved
data/Rakefile ADDED
@@ -0,0 +1,69 @@
1
+ require 'rspec/core/rake_task'
2
+ require 'bundler/gem_tasks'
3
+
4
+ lib_folder = File.expand_path("../lib", __FILE__)
5
+
6
+ RUBIES = ['ruby-2.0.0-p643', 'ruby-2.1.5', 'ruby-2.2.1', 'ruby-2.3.0']
7
+
8
+ def spec_run_all
9
+ RUBIES.each do |ruby_v|
10
+ puts "\n Using #{ruby_v}\n\n"
11
+ command = "$rvm_path/wrappers/#{ruby_v}/rake summary"
12
+ run command
13
+ end
14
+ end
15
+
16
+ task :spec do
17
+ case ARGV[1]
18
+ when 'setup'
19
+ spec_setup
20
+ when 'run'
21
+ spec_run_all if ARGV[2] == 'all'
22
+ when nil
23
+ run 'rspec spec'
24
+ end
25
+ end
26
+
27
+ # Stubs
28
+ task :setup
29
+ task :run
30
+ task :all
31
+
32
+ def spec_setup
33
+ RUBIES.each do |ruby_v|
34
+ puts "Installing #{ruby_v}..."
35
+ run "rvm install #{ruby_v}"
36
+ path = "$rvm_path/wrappers/#{ruby_v}"
37
+ run "#{path}/gem install bundler"
38
+ run "#{path}/bundle install"
39
+ end
40
+ end
41
+
42
+ #task all: [:cop, :run_all]
43
+
44
+ task :summary do
45
+ run 'rspec spec/ -r ./.rspec_formatter.rb -f SimpleFormatter' rescue nil
46
+ end
47
+
48
+ #RSpec::Core::RakeTask.new(:spec)
49
+
50
+ task :default => :spec
51
+
52
+ task :console do |task|
53
+ cmd = [ 'irb', "-r '#{lib_folder}/daru_lite.rb'" ]
54
+ run *cmd
55
+ end
56
+
57
+ task :cop do |task|
58
+ run 'rubocop' rescue nil
59
+ end
60
+
61
+ task :pry do |task|
62
+ cmd = [ 'pry', "-r '#{lib_folder}/daru_lite.rb'" ]
63
+ run *cmd
64
+ end
65
+
66
+ def run *cmd
67
+ sh(cmd.join(" "))
68
+ end
69
+
data/ReleasePolicy.md ADDED
@@ -0,0 +1,20 @@
1
+ # Gem Release Policy
2
+
3
+ Applicable to Daru > 0.1.6
4
+
5
+ ## Versioning
6
+
7
+ Daru follows semantic versioning whereby the version number is always in the form MAJOR.MINOR.PATCH
8
+
9
+ * Patch bump = Bug fixes
10
+ * Minor bump = New features but backwards compatible
11
+ * Major bump = API breaking changes
12
+
13
+ For Major and Minor bumps release candidates should be released around 2 weeks prior to the bump and are indicated by MAJOR.MINOR.0.rc.
14
+
15
+ For more information see the full semantic versioning specification at http://semver.org/.
16
+
17
+ ## Release Timing
18
+
19
+ Patch releases should be done after every fix of a major bug (as tagged in the github issue tracker).
20
+ Major releases should be kept to the minimum.