daru_lite 0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (149) hide show
  1. checksums.yaml +7 -0
  2. data/.github/ISSUE_TEMPLATE.md +18 -0
  3. data/.github/workflows/ci.yml +33 -0
  4. data/.gitignore +10 -0
  5. data/.rspec +2 -0
  6. data/.rubocop.yml +27 -0
  7. data/.rubocop_todo.yml +137 -0
  8. data/CONTRIBUTING.md +47 -0
  9. data/Gemfile +2 -0
  10. data/History.md +4 -0
  11. data/LICENSE +24 -0
  12. data/README.md +218 -0
  13. data/Rakefile +69 -0
  14. data/ReleasePolicy.md +20 -0
  15. data/benchmarks/TradeoffData.csv +65 -0
  16. data/benchmarks/csv_reading.rb +22 -0
  17. data/benchmarks/dataframe_creation.rb +39 -0
  18. data/benchmarks/db_loading.rb +34 -0
  19. data/benchmarks/duplicating.rb +45 -0
  20. data/benchmarks/group_by.rb +32 -0
  21. data/benchmarks/joining.rb +52 -0
  22. data/benchmarks/row_access.rb +41 -0
  23. data/benchmarks/row_assign.rb +36 -0
  24. data/benchmarks/sorting.rb +51 -0
  25. data/benchmarks/statistics.rb +28 -0
  26. data/benchmarks/vector_access.rb +31 -0
  27. data/benchmarks/vector_assign.rb +42 -0
  28. data/benchmarks/where_clause.rb +48 -0
  29. data/benchmarks/where_vs_filter.rb +28 -0
  30. data/daru_lite.gemspec +55 -0
  31. data/images/README.md +5 -0
  32. data/images/con0.png +0 -0
  33. data/images/con1.png +0 -0
  34. data/images/init0.png +0 -0
  35. data/images/init1.png +0 -0
  36. data/images/man0.png +0 -0
  37. data/images/man1.png +0 -0
  38. data/images/man2.png +0 -0
  39. data/images/man3.png +0 -0
  40. data/images/man4.png +0 -0
  41. data/images/man5.png +0 -0
  42. data/images/man6.png +0 -0
  43. data/lib/daru_lite/accessors/array_wrapper.rb +109 -0
  44. data/lib/daru_lite/accessors/dataframe_by_row.rb +25 -0
  45. data/lib/daru_lite/accessors/mdarray_wrapper.rb +7 -0
  46. data/lib/daru_lite/category.rb +929 -0
  47. data/lib/daru_lite/configuration.rb +34 -0
  48. data/lib/daru_lite/core/group_by.rb +403 -0
  49. data/lib/daru_lite/core/merge.rb +270 -0
  50. data/lib/daru_lite/core/query.rb +109 -0
  51. data/lib/daru_lite/dataframe.rb +3080 -0
  52. data/lib/daru_lite/date_time/index.rb +569 -0
  53. data/lib/daru_lite/date_time/offsets.rb +397 -0
  54. data/lib/daru_lite/exceptions.rb +2 -0
  55. data/lib/daru_lite/extensions/which_dsl.rb +53 -0
  56. data/lib/daru_lite/formatters/table.rb +52 -0
  57. data/lib/daru_lite/helpers/array.rb +53 -0
  58. data/lib/daru_lite/index/categorical_index.rb +201 -0
  59. data/lib/daru_lite/index/index.rb +374 -0
  60. data/lib/daru_lite/index/multi_index.rb +374 -0
  61. data/lib/daru_lite/io/csv/converters.rb +21 -0
  62. data/lib/daru_lite/io/io.rb +294 -0
  63. data/lib/daru_lite/io/sql_data_source.rb +97 -0
  64. data/lib/daru_lite/iruby/helpers.rb +38 -0
  65. data/lib/daru_lite/iruby/templates/dataframe.html.erb +5 -0
  66. data/lib/daru_lite/iruby/templates/dataframe_mi.html.erb +5 -0
  67. data/lib/daru_lite/iruby/templates/dataframe_mi_tbody.html.erb +35 -0
  68. data/lib/daru_lite/iruby/templates/dataframe_mi_thead.html.erb +21 -0
  69. data/lib/daru_lite/iruby/templates/dataframe_tbody.html.erb +28 -0
  70. data/lib/daru_lite/iruby/templates/dataframe_thead.html.erb +21 -0
  71. data/lib/daru_lite/iruby/templates/multi_index.html.erb +12 -0
  72. data/lib/daru_lite/iruby/templates/vector.html.erb +5 -0
  73. data/lib/daru_lite/iruby/templates/vector_mi.html.erb +5 -0
  74. data/lib/daru_lite/iruby/templates/vector_mi_tbody.html.erb +26 -0
  75. data/lib/daru_lite/iruby/templates/vector_mi_thead.html.erb +8 -0
  76. data/lib/daru_lite/iruby/templates/vector_tbody.html.erb +17 -0
  77. data/lib/daru_lite/iruby/templates/vector_thead.html.erb +8 -0
  78. data/lib/daru_lite/maths/arithmetic/dataframe.rb +91 -0
  79. data/lib/daru_lite/maths/arithmetic/vector.rb +117 -0
  80. data/lib/daru_lite/maths/statistics/dataframe.rb +202 -0
  81. data/lib/daru_lite/maths/statistics/vector.rb +1019 -0
  82. data/lib/daru_lite/monkeys.rb +56 -0
  83. data/lib/daru_lite/vector.rb +1678 -0
  84. data/lib/daru_lite/version.rb +3 -0
  85. data/lib/daru_lite.rb +99 -0
  86. data/profile/_base.rb +23 -0
  87. data/profile/df_to_a.rb +10 -0
  88. data/profile/filter.rb +13 -0
  89. data/profile/joining.rb +13 -0
  90. data/profile/sorting.rb +12 -0
  91. data/profile/vector_each_with_index.rb +9 -0
  92. data/profile/vector_new.rb +9 -0
  93. data/spec/accessors/array_wrapper_spec.rb +3 -0
  94. data/spec/category_spec.rb +1741 -0
  95. data/spec/core/group_by_spec.rb +655 -0
  96. data/spec/core/merge_spec.rb +179 -0
  97. data/spec/core/query_spec.rb +347 -0
  98. data/spec/daru_lite_spec.rb +22 -0
  99. data/spec/dataframe_spec.rb +4330 -0
  100. data/spec/date_time/data_spec.rb +197 -0
  101. data/spec/date_time/date_time_index_helper_spec.rb +72 -0
  102. data/spec/date_time/index_spec.rb +588 -0
  103. data/spec/date_time/offsets_spec.rb +465 -0
  104. data/spec/extensions/which_dsl_spec.rb +38 -0
  105. data/spec/fixtures/bank2.dat +200 -0
  106. data/spec/fixtures/boolean_converter_test.csv +5 -0
  107. data/spec/fixtures/countries.json +7794 -0
  108. data/spec/fixtures/duplicates.csv +32 -0
  109. data/spec/fixtures/eciresults.html +394 -0
  110. data/spec/fixtures/empties.dat +2 -0
  111. data/spec/fixtures/empty_rows_test.csv +17 -0
  112. data/spec/fixtures/macau.html +3691 -0
  113. data/spec/fixtures/macd_data.csv +150 -0
  114. data/spec/fixtures/matrix_test.csv +100 -0
  115. data/spec/fixtures/moneycontrol.html +6812 -0
  116. data/spec/fixtures/music_data.tsv +2501 -0
  117. data/spec/fixtures/repeated_fields.csv +7 -0
  118. data/spec/fixtures/sales-funnel.csv +18 -0
  119. data/spec/fixtures/scientific_notation.csv +4 -0
  120. data/spec/fixtures/string_converter_test.csv +5 -0
  121. data/spec/fixtures/strings.dat +2 -0
  122. data/spec/fixtures/test_xls.xls +0 -0
  123. data/spec/fixtures/test_xls_2.xls +0 -0
  124. data/spec/fixtures/url_test.txt~ +0 -0
  125. data/spec/fixtures/valid_markup.html +62 -0
  126. data/spec/fixtures/wiki_climate.html +1243 -0
  127. data/spec/fixtures/wiki_table_info.html +631 -0
  128. data/spec/formatters/table_formatter_spec.rb +137 -0
  129. data/spec/helpers_spec.rb +8 -0
  130. data/spec/index/categorical_index_spec.rb +170 -0
  131. data/spec/index/index_spec.rb +417 -0
  132. data/spec/index/multi_index_spec.rb +680 -0
  133. data/spec/io/io_spec.rb +373 -0
  134. data/spec/io/sql_data_source_spec.rb +56 -0
  135. data/spec/iruby/dataframe_spec.rb +170 -0
  136. data/spec/iruby/helpers_spec.rb +49 -0
  137. data/spec/iruby/multi_index_spec.rb +37 -0
  138. data/spec/iruby/vector_spec.rb +105 -0
  139. data/spec/maths/arithmetic/dataframe_spec.rb +148 -0
  140. data/spec/maths/arithmetic/vector_spec.rb +165 -0
  141. data/spec/maths/statistics/dataframe_spec.rb +178 -0
  142. data/spec/maths/statistics/vector_spec.rb +756 -0
  143. data/spec/monkeys_spec.rb +42 -0
  144. data/spec/shared/vector_display_spec.rb +213 -0
  145. data/spec/spec_helper.rb +87 -0
  146. data/spec/support/database_helper.rb +30 -0
  147. data/spec/support/matchers.rb +5 -0
  148. data/spec/vector_spec.rb +2293 -0
  149. metadata +571 -0
@@ -0,0 +1,3 @@
1
+ module DaruLite
2
+ VERSION = '0.1'.freeze
3
+ end
data/lib/daru_lite.rb ADDED
@@ -0,0 +1,99 @@
1
+ # :nocov:
2
+ def jruby?
3
+ RUBY_ENGINE == 'jruby'
4
+ end
5
+ # :nocov:
6
+
7
+ module DaruLite
8
+ DAYS_OF_WEEK = {
9
+ 'SUN' => 0,
10
+ 'MON' => 1,
11
+ 'TUE' => 2,
12
+ 'WED' => 3,
13
+ 'THU' => 4,
14
+ 'FRI' => 5,
15
+ 'SAT' => 6
16
+ }.freeze
17
+
18
+ MONTH_DAYS = {
19
+ 1 => 31,
20
+ 2 => 28,
21
+ 3 => 31,
22
+ 4 => 30,
23
+ 5 => 31,
24
+ 6 => 30,
25
+ 7 => 31,
26
+ 8 => 31,
27
+ 9 => 30,
28
+ 10 => 31,
29
+ 11 => 30,
30
+ 12 => 31
31
+ }.freeze
32
+
33
+ MISSING_VALUES = [nil, Float::NAN].freeze
34
+
35
+ @lazy_update = false
36
+
37
+ SPLIT_TOKEN = ','.freeze
38
+
39
+ @error_stream = $stderr
40
+
41
+ class << self
42
+ # A variable which will set whether Vector metadata is updated immediately or lazily.
43
+ # Call the #update method every time a values are set or removed in order to update
44
+ # metadata like positions of missing values.
45
+ attr_accessor :lazy_update, :error_stream
46
+
47
+ def create_has_library(library)
48
+ lib_underscore = library.to_s.tr('-', '_')
49
+ define_singleton_method(:"has_#{lib_underscore}?") do
50
+ cv = "@@#{lib_underscore}"
51
+ unless class_variable_defined? cv
52
+ begin
53
+ require library.to_s
54
+ class_variable_set(cv, true)
55
+ rescue LoadError
56
+ # :nocov:
57
+ class_variable_set(cv, false)
58
+ # :nocov:
59
+ end
60
+ end
61
+ class_variable_get(cv)
62
+ end
63
+ end
64
+
65
+ def error(msg)
66
+ error_stream&.puts msg
67
+ end
68
+ end
69
+ end
70
+
71
+ autoload :CSV, 'csv'
72
+ require 'matrix'
73
+ require 'forwardable'
74
+ require 'erb'
75
+ require 'date'
76
+
77
+ require 'daru_lite/version'
78
+
79
+ require 'open-uri'
80
+
81
+ require 'daru_lite/index/index'
82
+ require 'daru_lite/index/multi_index'
83
+ require 'daru_lite/index/categorical_index'
84
+
85
+ require 'daru_lite/helpers/array'
86
+ require 'daru_lite/configuration'
87
+ require 'daru_lite/vector'
88
+ require 'daru_lite/dataframe'
89
+ require 'daru_lite/monkeys'
90
+ require 'daru_lite/formatters/table'
91
+ require 'daru_lite/iruby/helpers'
92
+ require 'daru_lite/exceptions'
93
+
94
+ require 'daru_lite/core/group_by'
95
+ require 'daru_lite/core/query'
96
+ require 'daru_lite/core/merge'
97
+
98
+ require 'daru_lite/date_time/offsets'
99
+ require 'daru_lite/date_time/index'
data/profile/_base.rb ADDED
@@ -0,0 +1,23 @@
1
+ $:.unshift File.expand_path("../../lib", __FILE__)
2
+
3
+ require 'ruby-prof'
4
+ require 'fileutils'
5
+
6
+ require 'daru_lite'
7
+
8
+ def __profile__(name = nil)
9
+ # infers name to be "sorting" when called from "profile/sorting.rb:10:in `<main>'"
10
+ name ||= caller.first.split(':').first.split('/').last.sub('.rb', '')
11
+
12
+ path = File.expand_path("../out/#{name}.html", __FILE__)
13
+ FileUtils.mkdir_p File.dirname(path)
14
+
15
+ RubyProf.start
16
+
17
+ yield
18
+
19
+ res = RubyProf.stop
20
+ RubyProf::GraphHtmlPrinter.new(res)
21
+ .print(File.open(path, 'w'))
22
+
23
+ end
@@ -0,0 +1,10 @@
1
+ require_relative '_base'
2
+
3
+ n = 40_000
4
+ keys = (1..(n)).to_a
5
+
6
+ df = DaruLite::DataFrame.new(idx: 1.upto(n).to_a, keys: 1.upto(n).map { |v| keys[Random.rand(n)]})
7
+
8
+ __profile__ do
9
+ df.to_a
10
+ end
data/profile/filter.rb ADDED
@@ -0,0 +1,13 @@
1
+ require_relative '_base'
2
+
3
+ df = DaruLite::DataFrame.new({
4
+ a: [1,2,3,4,5,6]*1000,
5
+ b: ['a','b','c','d','e','f']*1000,
6
+ c: [11,22,33,44,55,66]*1000
7
+ }, index: (1..6000).to_a.shuffle)
8
+
9
+ __profile__ do
10
+ df.filter(:row) do |r|
11
+ r[:a] == 2 or r[:c] == 55
12
+ end
13
+ end
@@ -0,0 +1,13 @@
1
+ require_relative '_base'
2
+
3
+ n = 40_000
4
+ keys = (1..(n)).to_a
5
+ base_data = { idx: 1.upto(n).to_a, keys: 1.upto(n).map { |v| keys[Random.rand(n)]}}
6
+ lookup_hash = keys.map { |k| [k, k * 100]}.to_h
7
+
8
+ base_data_df = DaruLite::DataFrame.new(base_data)
9
+ lookup_df = DaruLite::DataFrame.new(keys: lookup_hash.keys, values: lookup_hash.values)
10
+
11
+ __profile__ do
12
+ base_data_df.join(lookup_df, on: [:keys], how: :inner)
13
+ end
@@ -0,0 +1,12 @@
1
+ require_relative '_base'
2
+
3
+ vector = DaruLite::Vector.new(10_000.times.map.to_a.shuffle)
4
+ df = DaruLite::DataFrame.new({
5
+ a: vector,
6
+ b: vector,
7
+ c: vector
8
+ })
9
+
10
+ __profile__ do
11
+ df.sort([:a])
12
+ end
@@ -0,0 +1,9 @@
1
+ require_relative '_base'
2
+
3
+ vector = DaruLite::Vector.new(['a','b','c','d','e','f']*1000, index: (1..6000).to_a.shuffle)
4
+
5
+ __profile__ do
6
+ 100.times do
7
+ vector.each_with_index{|val, i| }
8
+ end
9
+ end
@@ -0,0 +1,9 @@
1
+ require_relative '_base'
2
+
3
+ n = 40_0000
4
+ idx = (1..n).to_a.map(&:to_s)
5
+
6
+
7
+ __profile__ do
8
+ DaruLite::Vector.new(1..n, index: idx)
9
+ end
@@ -0,0 +1,3 @@
1
+ describe DaruLite::Accessors::ArrayWrapper do
2
+ # TODO:
3
+ end