daru_lite 0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (149) hide show
  1. checksums.yaml +7 -0
  2. data/.github/ISSUE_TEMPLATE.md +18 -0
  3. data/.github/workflows/ci.yml +33 -0
  4. data/.gitignore +10 -0
  5. data/.rspec +2 -0
  6. data/.rubocop.yml +27 -0
  7. data/.rubocop_todo.yml +137 -0
  8. data/CONTRIBUTING.md +47 -0
  9. data/Gemfile +2 -0
  10. data/History.md +4 -0
  11. data/LICENSE +24 -0
  12. data/README.md +218 -0
  13. data/Rakefile +69 -0
  14. data/ReleasePolicy.md +20 -0
  15. data/benchmarks/TradeoffData.csv +65 -0
  16. data/benchmarks/csv_reading.rb +22 -0
  17. data/benchmarks/dataframe_creation.rb +39 -0
  18. data/benchmarks/db_loading.rb +34 -0
  19. data/benchmarks/duplicating.rb +45 -0
  20. data/benchmarks/group_by.rb +32 -0
  21. data/benchmarks/joining.rb +52 -0
  22. data/benchmarks/row_access.rb +41 -0
  23. data/benchmarks/row_assign.rb +36 -0
  24. data/benchmarks/sorting.rb +51 -0
  25. data/benchmarks/statistics.rb +28 -0
  26. data/benchmarks/vector_access.rb +31 -0
  27. data/benchmarks/vector_assign.rb +42 -0
  28. data/benchmarks/where_clause.rb +48 -0
  29. data/benchmarks/where_vs_filter.rb +28 -0
  30. data/daru_lite.gemspec +55 -0
  31. data/images/README.md +5 -0
  32. data/images/con0.png +0 -0
  33. data/images/con1.png +0 -0
  34. data/images/init0.png +0 -0
  35. data/images/init1.png +0 -0
  36. data/images/man0.png +0 -0
  37. data/images/man1.png +0 -0
  38. data/images/man2.png +0 -0
  39. data/images/man3.png +0 -0
  40. data/images/man4.png +0 -0
  41. data/images/man5.png +0 -0
  42. data/images/man6.png +0 -0
  43. data/lib/daru_lite/accessors/array_wrapper.rb +109 -0
  44. data/lib/daru_lite/accessors/dataframe_by_row.rb +25 -0
  45. data/lib/daru_lite/accessors/mdarray_wrapper.rb +7 -0
  46. data/lib/daru_lite/category.rb +929 -0
  47. data/lib/daru_lite/configuration.rb +34 -0
  48. data/lib/daru_lite/core/group_by.rb +403 -0
  49. data/lib/daru_lite/core/merge.rb +270 -0
  50. data/lib/daru_lite/core/query.rb +109 -0
  51. data/lib/daru_lite/dataframe.rb +3080 -0
  52. data/lib/daru_lite/date_time/index.rb +569 -0
  53. data/lib/daru_lite/date_time/offsets.rb +397 -0
  54. data/lib/daru_lite/exceptions.rb +2 -0
  55. data/lib/daru_lite/extensions/which_dsl.rb +53 -0
  56. data/lib/daru_lite/formatters/table.rb +52 -0
  57. data/lib/daru_lite/helpers/array.rb +53 -0
  58. data/lib/daru_lite/index/categorical_index.rb +201 -0
  59. data/lib/daru_lite/index/index.rb +374 -0
  60. data/lib/daru_lite/index/multi_index.rb +374 -0
  61. data/lib/daru_lite/io/csv/converters.rb +21 -0
  62. data/lib/daru_lite/io/io.rb +294 -0
  63. data/lib/daru_lite/io/sql_data_source.rb +97 -0
  64. data/lib/daru_lite/iruby/helpers.rb +38 -0
  65. data/lib/daru_lite/iruby/templates/dataframe.html.erb +5 -0
  66. data/lib/daru_lite/iruby/templates/dataframe_mi.html.erb +5 -0
  67. data/lib/daru_lite/iruby/templates/dataframe_mi_tbody.html.erb +35 -0
  68. data/lib/daru_lite/iruby/templates/dataframe_mi_thead.html.erb +21 -0
  69. data/lib/daru_lite/iruby/templates/dataframe_tbody.html.erb +28 -0
  70. data/lib/daru_lite/iruby/templates/dataframe_thead.html.erb +21 -0
  71. data/lib/daru_lite/iruby/templates/multi_index.html.erb +12 -0
  72. data/lib/daru_lite/iruby/templates/vector.html.erb +5 -0
  73. data/lib/daru_lite/iruby/templates/vector_mi.html.erb +5 -0
  74. data/lib/daru_lite/iruby/templates/vector_mi_tbody.html.erb +26 -0
  75. data/lib/daru_lite/iruby/templates/vector_mi_thead.html.erb +8 -0
  76. data/lib/daru_lite/iruby/templates/vector_tbody.html.erb +17 -0
  77. data/lib/daru_lite/iruby/templates/vector_thead.html.erb +8 -0
  78. data/lib/daru_lite/maths/arithmetic/dataframe.rb +91 -0
  79. data/lib/daru_lite/maths/arithmetic/vector.rb +117 -0
  80. data/lib/daru_lite/maths/statistics/dataframe.rb +202 -0
  81. data/lib/daru_lite/maths/statistics/vector.rb +1019 -0
  82. data/lib/daru_lite/monkeys.rb +56 -0
  83. data/lib/daru_lite/vector.rb +1678 -0
  84. data/lib/daru_lite/version.rb +3 -0
  85. data/lib/daru_lite.rb +99 -0
  86. data/profile/_base.rb +23 -0
  87. data/profile/df_to_a.rb +10 -0
  88. data/profile/filter.rb +13 -0
  89. data/profile/joining.rb +13 -0
  90. data/profile/sorting.rb +12 -0
  91. data/profile/vector_each_with_index.rb +9 -0
  92. data/profile/vector_new.rb +9 -0
  93. data/spec/accessors/array_wrapper_spec.rb +3 -0
  94. data/spec/category_spec.rb +1741 -0
  95. data/spec/core/group_by_spec.rb +655 -0
  96. data/spec/core/merge_spec.rb +179 -0
  97. data/spec/core/query_spec.rb +347 -0
  98. data/spec/daru_lite_spec.rb +22 -0
  99. data/spec/dataframe_spec.rb +4330 -0
  100. data/spec/date_time/data_spec.rb +197 -0
  101. data/spec/date_time/date_time_index_helper_spec.rb +72 -0
  102. data/spec/date_time/index_spec.rb +588 -0
  103. data/spec/date_time/offsets_spec.rb +465 -0
  104. data/spec/extensions/which_dsl_spec.rb +38 -0
  105. data/spec/fixtures/bank2.dat +200 -0
  106. data/spec/fixtures/boolean_converter_test.csv +5 -0
  107. data/spec/fixtures/countries.json +7794 -0
  108. data/spec/fixtures/duplicates.csv +32 -0
  109. data/spec/fixtures/eciresults.html +394 -0
  110. data/spec/fixtures/empties.dat +2 -0
  111. data/spec/fixtures/empty_rows_test.csv +17 -0
  112. data/spec/fixtures/macau.html +3691 -0
  113. data/spec/fixtures/macd_data.csv +150 -0
  114. data/spec/fixtures/matrix_test.csv +100 -0
  115. data/spec/fixtures/moneycontrol.html +6812 -0
  116. data/spec/fixtures/music_data.tsv +2501 -0
  117. data/spec/fixtures/repeated_fields.csv +7 -0
  118. data/spec/fixtures/sales-funnel.csv +18 -0
  119. data/spec/fixtures/scientific_notation.csv +4 -0
  120. data/spec/fixtures/string_converter_test.csv +5 -0
  121. data/spec/fixtures/strings.dat +2 -0
  122. data/spec/fixtures/test_xls.xls +0 -0
  123. data/spec/fixtures/test_xls_2.xls +0 -0
  124. data/spec/fixtures/url_test.txt~ +0 -0
  125. data/spec/fixtures/valid_markup.html +62 -0
  126. data/spec/fixtures/wiki_climate.html +1243 -0
  127. data/spec/fixtures/wiki_table_info.html +631 -0
  128. data/spec/formatters/table_formatter_spec.rb +137 -0
  129. data/spec/helpers_spec.rb +8 -0
  130. data/spec/index/categorical_index_spec.rb +170 -0
  131. data/spec/index/index_spec.rb +417 -0
  132. data/spec/index/multi_index_spec.rb +680 -0
  133. data/spec/io/io_spec.rb +373 -0
  134. data/spec/io/sql_data_source_spec.rb +56 -0
  135. data/spec/iruby/dataframe_spec.rb +170 -0
  136. data/spec/iruby/helpers_spec.rb +49 -0
  137. data/spec/iruby/multi_index_spec.rb +37 -0
  138. data/spec/iruby/vector_spec.rb +105 -0
  139. data/spec/maths/arithmetic/dataframe_spec.rb +148 -0
  140. data/spec/maths/arithmetic/vector_spec.rb +165 -0
  141. data/spec/maths/statistics/dataframe_spec.rb +178 -0
  142. data/spec/maths/statistics/vector_spec.rb +756 -0
  143. data/spec/monkeys_spec.rb +42 -0
  144. data/spec/shared/vector_display_spec.rb +213 -0
  145. data/spec/spec_helper.rb +87 -0
  146. data/spec/support/database_helper.rb +30 -0
  147. data/spec/support/matchers.rb +5 -0
  148. data/spec/vector_spec.rb +2293 -0
  149. metadata +571 -0
@@ -0,0 +1,197 @@
1
+ describe DaruLite::Vector do
2
+ context "#initialize" do
3
+ it "accepts DateTimeIndex in index option" do
4
+ index = DaruLite::DateTimeIndex.date_range(:start => DateTime.new(2012,2,1), periods: 100)
5
+ vector = DaruLite::Vector.new [1,2,3,4,5]*20, index: index
6
+
7
+ expect(vector.class).to eq(DaruLite::Vector)
8
+ expect(vector['2012-2-3']).to eq(3)
9
+ end
10
+ end
11
+
12
+ context "#[]" do
13
+ before do
14
+ index = DaruLite::DateTimeIndex.date_range(
15
+ :start => DateTime.new(2012,4,4), :end => DateTime.new(2012,4,7), freq: 'H')
16
+ @vector = DaruLite::Vector.new([23]*index.size, index: index)
17
+ end
18
+
19
+ it "returns the element when complete date" do
20
+ expect(@vector['2012-4-4 22:00:00']).to eq(23)
21
+ end
22
+
23
+ it "accepts DateTime object for [] argument" do
24
+ expect(@vector[DateTime.new(2012,4,4,22)]).to eq(23)
25
+ end
26
+
27
+ it "returns slice when partial date" do
28
+ slice_index = DaruLite::DateTimeIndex.date_range(
29
+ :start => DateTime.new(2012,4,4), :periods => 24, freq: 'H')
30
+ expect(@vector['2012-4-4']).to eq(
31
+ DaruLite::Vector.new([23]*slice_index.size, index: slice_index))
32
+ end
33
+
34
+ it "returns a slice when range" do
35
+ slice_index = DaruLite::DateTimeIndex.date_range(
36
+ :start => DateTime.new(2012,4,4), :end => DateTime.new(2012,4,5,23,), freq: 'H')
37
+ expect(@vector['2012-4-4'..'2012-4-5']).to eq(
38
+ DaruLite::Vector.new([23]*slice_index.size, index: slice_index))
39
+ end
40
+
41
+ it "returns a slice when numeric range" do
42
+ slice_index = DaruLite::DateTimeIndex.date_range(
43
+ :start => DateTime.new(2012,4,4), :periods => 20, :freq => 'H')
44
+ expect(@vector[0..19]).to eq(
45
+ DaruLite::Vector.new([23]*slice_index.size, index: slice_index))
46
+ end
47
+
48
+ it "returns the element when number" do
49
+ expect(@vector[32]).to eq(23)
50
+ end
51
+ end
52
+
53
+ context "#[]=" do
54
+ it "assigns a single element when index complete" do
55
+ index = DaruLite::DateTimeIndex.date_range(:start => '2012', :periods => 5, :freq => 'D')
56
+ vector = DaruLite::Vector.new([1,2,3,4,5], index: index)
57
+ vector['2012-1-4'] = 666
58
+ expect(vector).to eq(DaruLite::Vector.new([1,2,3,666,5], index: index))
59
+ end
60
+
61
+ it "assigns single element when specified a number for indexing" do
62
+ index = DaruLite::DateTimeIndex.date_range(:start => '2012', :periods => 5, :freq => 'D')
63
+ vector = DaruLite::Vector.new([1,2,3,4,5], index: index)
64
+
65
+ vector[2] = 666
66
+ expect(vector).to eq(
67
+ DaruLite::Vector.new([1,2,666,4,5], index: index))
68
+ end
69
+
70
+ it "assigns multiple elements when index incomplete" do
71
+ index = DaruLite::DateTimeIndex.date_range(:start => '2012', :periods => 100,
72
+ :freq => 'MB')
73
+ vector = DaruLite::Vector.new([1,2,3,4,5,6,7,8,9,10]*10, index: index)
74
+ vector['2012'] = 666
75
+ arr = [666]*12 + [3,4,5,6,7,8,9,10] + [1,2,3,4,5,6,7,8,9,10]*8
76
+ expect(vector).to eq(DaruLite::Vector.new(arr, index: index))
77
+ end
78
+ end
79
+ end
80
+
81
+ describe DaruLite::DataFrame do
82
+ before :each do
83
+ @index = DaruLite::DateTimeIndex.date_range(:start => '2012-2-1', periods: 100)
84
+ @order = DaruLite::DateTimeIndex.new([
85
+ DateTime.new(2012,1,3),DateTime.new(2013,2,3),DateTime.new(2012,3,3)])
86
+ @a = [1,2,3,4,5]*20
87
+ @b = @a.map { |e| e*3 }
88
+ @c = @a.map(&:to_s)
89
+ @df = DaruLite::DataFrame.new([@a, @b, @c], index: @index, order: @order)
90
+ end
91
+
92
+ context "#initialize" do
93
+ it "accepts DateTimeIndex for index and order options" do
94
+ expect(@df.index).to eq(@index)
95
+ expect(@df['2013-2-3']).to eq(
96
+ DaruLite::Vector.new(@b, index: @index))
97
+ end
98
+ end
99
+
100
+ context "#[]" do
101
+ it "returns one Vector when complete index" do
102
+ expect(@df['2012-3-3']).to eq(DaruLite::Vector.new(@c, index: @index))
103
+ end
104
+
105
+ it "returns a Vector when DateTime object specified" do
106
+ expect(@df[DateTime.new(2012,3,3)]).to eq(
107
+ DaruLite::Vector.new(@c, index: @index))
108
+ end
109
+
110
+ it "returns DataFrame when incomplete index" do
111
+ answer = DaruLite::DataFrame.new(
112
+ [@a, @c], index: @index, order: DaruLite::DateTimeIndex.new([
113
+ DateTime.new(2012,1,3),DateTime.new(2012,3,3)]))
114
+ expect(@df['2012']).to eq(answer)
115
+ end
116
+
117
+ it "returns Vector when single index specified as a number" do
118
+ expect(@df[1]).to eq(DaruLite::Vector.new(@b, index: @index))
119
+ end
120
+ end
121
+
122
+ context "#[]=" do
123
+ it "assigns one Vector when complete index" do
124
+ answer = DaruLite::DataFrame.new([@a, @b, @a], index: @index, order: @order)
125
+ @df['2012-3-3'] = @a
126
+ expect(@df).to eq(answer)
127
+ end
128
+
129
+ it "assigns one Vector when index as DateTime object" do
130
+ answer = DaruLite::DataFrame.new([@a, @b, @a], index: @index, order: @order)
131
+ @df[DateTime.new(2012,3,3)] = @a
132
+ expect(@df).to eq(answer)
133
+ end
134
+
135
+ it "assigns multiple vectors when incomplete index" do
136
+ answer = DaruLite::DataFrame.new([@b,@b,@b], index: @index, order: @order)
137
+ @df['2012'] = @b
138
+ expect(@df).to eq(answer)
139
+ end
140
+
141
+ it "assigns Vector when specified position index" do
142
+ answer = DaruLite::DataFrame.new([@a, @b, @a], index: @index, order: @order)
143
+ @df[2] = @a
144
+ expect(@df).to eq(answer)
145
+ end
146
+ end
147
+
148
+ context "#row[]" do
149
+ it "returns one row Vector when complete index" do
150
+ expect(@df.row['2012-2-1']).to eq(DaruLite::Vector.new([1,3,"1"], index: @order))
151
+ end
152
+
153
+ it "returns one row when complete DateTime specified" do
154
+ expect(@df.row[DateTime.new(2012,2,1)]).to eq(
155
+ DaruLite::Vector.new([1,3,"1"], index: @order))
156
+ end
157
+
158
+ it "returns DataFrame when incomplete index" do
159
+ range = 0..28
160
+ a = @a[range]
161
+ b = @b[range]
162
+ c = @c[range]
163
+ i = DaruLite::DateTimeIndex.date_range(:start => '2012-2-1', periods: 29)
164
+ answer = DaruLite::DataFrame.new([a,b,c], index: i, order: @order)
165
+
166
+ expect(@df.row['2012-2']).to eq(answer)
167
+ end
168
+
169
+ it "returns one row Vector when position index" do
170
+ expect(@df.row[2]).to eq(DaruLite::Vector.new([3,9,'3'], index: @order))
171
+ end
172
+ end
173
+
174
+ context "#row[]=" do
175
+ it "assigns one row Vector when complete index" do
176
+ @df.row['2012-2-4'] = [666,999,0]
177
+ expect(@df.row['2012-2-4']).to eq(DaruLite::Vector.new([666,999,0], index: @order))
178
+ end
179
+
180
+ it "assigns one row Vector when complete index as DateTime" do
181
+ @df.row[DateTime.new(2012,2,5)] = [1,2,3]
182
+ expect(@df.row[DateTime.new(2012,2,5)]).to eq(
183
+ DaruLite::Vector.new([1,2,3], index: @order))
184
+ end
185
+
186
+ it "assigns multiple rows when incomplete index" do
187
+ a = [666]*29
188
+ b = [999]*29
189
+ c = [0]*29
190
+ index = DaruLite::DateTimeIndex.date_range(:start => '2012-2-1', :periods => 29)
191
+ answer = DaruLite::DataFrame.new([a,b,c], index: index, order: @order)
192
+ @df.row['2012-2'] = [666,999,0]
193
+
194
+ expect(@df.row['2012-2']).to eq(answer)
195
+ end
196
+ end
197
+ end
@@ -0,0 +1,72 @@
1
+ include DaruLite
2
+
3
+ describe DaruLite::DateTimeIndexHelper do
4
+
5
+
6
+ describe '.infer_offset' do
7
+ subject(:offset) { DaruLite::DateTimeIndexHelper.infer_offset(data) }
8
+
9
+ context 'when the dataset does not have a regular offset' do
10
+ let(:data) do
11
+ [
12
+ DateTime.new(2020, 1, 1, 00, 00, 00),
13
+ DateTime.new(2020, 1, 1, 00, 01, 00),
14
+ DateTime.new(2020, 1, 1, 00, 05, 00),
15
+ ]
16
+ end
17
+
18
+ it 'returns nil' do
19
+ expect(offset).to be_nil
20
+ end
21
+ end
22
+
23
+ context 'when the dataset matches a defined offset' do
24
+ let(:data) do
25
+ [
26
+ DateTime.new(2020, 1, 1, 00, 00, 00),
27
+ DateTime.new(2020, 1, 1, 00, 01, 00),
28
+ DateTime.new(2020, 1, 1, 00, 02, 00),
29
+ ]
30
+ end
31
+
32
+ it 'returns the matched offset' do
33
+ expect(offset).to be_an_instance_of(DaruLite::Offsets::Minute)
34
+ end
35
+ end
36
+
37
+ context 'when the offset is a multiple of seconds' do
38
+ let(:data) do
39
+ [
40
+ DateTime.new(2020, 1, 1, 00, 00, 00),
41
+ DateTime.new(2020, 1, 1, 00, 00, 03),
42
+ DateTime.new(2020, 1, 1, 00, 00, 06),
43
+ ]
44
+ end
45
+
46
+ let(:expected_offset) { DaruLite::Offsets::Second.new(3) }
47
+
48
+ it 'returns a Second offset' do
49
+ expect(offset).to be_an_instance_of(DaruLite::Offsets::Second)
50
+ end
51
+
52
+ it 'has the correct multiplier' do
53
+ expect(offset.freq_string).to eql(expected_offset.freq_string)
54
+ end
55
+ end
56
+
57
+ context 'when the offset is less than a second' do
58
+ let(:data) do
59
+ [
60
+ DateTime.new(2020, 1, 1, 00, 00, 00) + 0.00001,
61
+ DateTime.new(2020, 1, 1, 00, 00, 00) + 0.00002,
62
+ DateTime.new(2020, 1, 1, 00, 00, 00) + 0.00003,
63
+ ]
64
+ end
65
+
66
+ it 'returns nil' do
67
+ expect(offset).to be_nil
68
+ end
69
+ end
70
+ end
71
+
72
+ end