daru_lite 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. checksums.yaml +7 -0
  2. data/.github/ISSUE_TEMPLATE.md +18 -0
  3. data/.github/workflows/ci.yml +33 -0
  4. data/.gitignore +10 -0
  5. data/.rspec +2 -0
  6. data/.rubocop.yml +27 -0
  7. data/.rubocop_todo.yml +137 -0
  8. data/CONTRIBUTING.md +47 -0
  9. data/Gemfile +2 -0
  10. data/History.md +4 -0
  11. data/LICENSE +24 -0
  12. data/README.md +218 -0
  13. data/Rakefile +69 -0
  14. data/ReleasePolicy.md +20 -0
  15. data/benchmarks/TradeoffData.csv +65 -0
  16. data/benchmarks/csv_reading.rb +22 -0
  17. data/benchmarks/dataframe_creation.rb +39 -0
  18. data/benchmarks/db_loading.rb +34 -0
  19. data/benchmarks/duplicating.rb +45 -0
  20. data/benchmarks/group_by.rb +32 -0
  21. data/benchmarks/joining.rb +52 -0
  22. data/benchmarks/row_access.rb +41 -0
  23. data/benchmarks/row_assign.rb +36 -0
  24. data/benchmarks/sorting.rb +51 -0
  25. data/benchmarks/statistics.rb +28 -0
  26. data/benchmarks/vector_access.rb +31 -0
  27. data/benchmarks/vector_assign.rb +42 -0
  28. data/benchmarks/where_clause.rb +48 -0
  29. data/benchmarks/where_vs_filter.rb +28 -0
  30. data/daru_lite.gemspec +55 -0
  31. data/images/README.md +5 -0
  32. data/images/con0.png +0 -0
  33. data/images/con1.png +0 -0
  34. data/images/init0.png +0 -0
  35. data/images/init1.png +0 -0
  36. data/images/man0.png +0 -0
  37. data/images/man1.png +0 -0
  38. data/images/man2.png +0 -0
  39. data/images/man3.png +0 -0
  40. data/images/man4.png +0 -0
  41. data/images/man5.png +0 -0
  42. data/images/man6.png +0 -0
  43. data/lib/daru_lite/accessors/array_wrapper.rb +109 -0
  44. data/lib/daru_lite/accessors/dataframe_by_row.rb +25 -0
  45. data/lib/daru_lite/accessors/mdarray_wrapper.rb +7 -0
  46. data/lib/daru_lite/category.rb +929 -0
  47. data/lib/daru_lite/configuration.rb +34 -0
  48. data/lib/daru_lite/core/group_by.rb +403 -0
  49. data/lib/daru_lite/core/merge.rb +270 -0
  50. data/lib/daru_lite/core/query.rb +109 -0
  51. data/lib/daru_lite/dataframe.rb +3080 -0
  52. data/lib/daru_lite/date_time/index.rb +569 -0
  53. data/lib/daru_lite/date_time/offsets.rb +397 -0
  54. data/lib/daru_lite/exceptions.rb +2 -0
  55. data/lib/daru_lite/extensions/which_dsl.rb +53 -0
  56. data/lib/daru_lite/formatters/table.rb +52 -0
  57. data/lib/daru_lite/helpers/array.rb +53 -0
  58. data/lib/daru_lite/index/categorical_index.rb +201 -0
  59. data/lib/daru_lite/index/index.rb +374 -0
  60. data/lib/daru_lite/index/multi_index.rb +374 -0
  61. data/lib/daru_lite/io/csv/converters.rb +21 -0
  62. data/lib/daru_lite/io/io.rb +294 -0
  63. data/lib/daru_lite/io/sql_data_source.rb +97 -0
  64. data/lib/daru_lite/iruby/helpers.rb +38 -0
  65. data/lib/daru_lite/iruby/templates/dataframe.html.erb +5 -0
  66. data/lib/daru_lite/iruby/templates/dataframe_mi.html.erb +5 -0
  67. data/lib/daru_lite/iruby/templates/dataframe_mi_tbody.html.erb +35 -0
  68. data/lib/daru_lite/iruby/templates/dataframe_mi_thead.html.erb +21 -0
  69. data/lib/daru_lite/iruby/templates/dataframe_tbody.html.erb +28 -0
  70. data/lib/daru_lite/iruby/templates/dataframe_thead.html.erb +21 -0
  71. data/lib/daru_lite/iruby/templates/multi_index.html.erb +12 -0
  72. data/lib/daru_lite/iruby/templates/vector.html.erb +5 -0
  73. data/lib/daru_lite/iruby/templates/vector_mi.html.erb +5 -0
  74. data/lib/daru_lite/iruby/templates/vector_mi_tbody.html.erb +26 -0
  75. data/lib/daru_lite/iruby/templates/vector_mi_thead.html.erb +8 -0
  76. data/lib/daru_lite/iruby/templates/vector_tbody.html.erb +17 -0
  77. data/lib/daru_lite/iruby/templates/vector_thead.html.erb +8 -0
  78. data/lib/daru_lite/maths/arithmetic/dataframe.rb +91 -0
  79. data/lib/daru_lite/maths/arithmetic/vector.rb +117 -0
  80. data/lib/daru_lite/maths/statistics/dataframe.rb +202 -0
  81. data/lib/daru_lite/maths/statistics/vector.rb +1019 -0
  82. data/lib/daru_lite/monkeys.rb +56 -0
  83. data/lib/daru_lite/vector.rb +1678 -0
  84. data/lib/daru_lite/version.rb +3 -0
  85. data/lib/daru_lite.rb +99 -0
  86. data/profile/_base.rb +23 -0
  87. data/profile/df_to_a.rb +10 -0
  88. data/profile/filter.rb +13 -0
  89. data/profile/joining.rb +13 -0
  90. data/profile/sorting.rb +12 -0
  91. data/profile/vector_each_with_index.rb +9 -0
  92. data/profile/vector_new.rb +9 -0
  93. data/spec/accessors/array_wrapper_spec.rb +3 -0
  94. data/spec/category_spec.rb +1741 -0
  95. data/spec/core/group_by_spec.rb +655 -0
  96. data/spec/core/merge_spec.rb +179 -0
  97. data/spec/core/query_spec.rb +347 -0
  98. data/spec/daru_lite_spec.rb +22 -0
  99. data/spec/dataframe_spec.rb +4330 -0
  100. data/spec/date_time/data_spec.rb +197 -0
  101. data/spec/date_time/date_time_index_helper_spec.rb +72 -0
  102. data/spec/date_time/index_spec.rb +588 -0
  103. data/spec/date_time/offsets_spec.rb +465 -0
  104. data/spec/extensions/which_dsl_spec.rb +38 -0
  105. data/spec/fixtures/bank2.dat +200 -0
  106. data/spec/fixtures/boolean_converter_test.csv +5 -0
  107. data/spec/fixtures/countries.json +7794 -0
  108. data/spec/fixtures/duplicates.csv +32 -0
  109. data/spec/fixtures/eciresults.html +394 -0
  110. data/spec/fixtures/empties.dat +2 -0
  111. data/spec/fixtures/empty_rows_test.csv +17 -0
  112. data/spec/fixtures/macau.html +3691 -0
  113. data/spec/fixtures/macd_data.csv +150 -0
  114. data/spec/fixtures/matrix_test.csv +100 -0
  115. data/spec/fixtures/moneycontrol.html +6812 -0
  116. data/spec/fixtures/music_data.tsv +2501 -0
  117. data/spec/fixtures/repeated_fields.csv +7 -0
  118. data/spec/fixtures/sales-funnel.csv +18 -0
  119. data/spec/fixtures/scientific_notation.csv +4 -0
  120. data/spec/fixtures/string_converter_test.csv +5 -0
  121. data/spec/fixtures/strings.dat +2 -0
  122. data/spec/fixtures/test_xls.xls +0 -0
  123. data/spec/fixtures/test_xls_2.xls +0 -0
  124. data/spec/fixtures/url_test.txt~ +0 -0
  125. data/spec/fixtures/valid_markup.html +62 -0
  126. data/spec/fixtures/wiki_climate.html +1243 -0
  127. data/spec/fixtures/wiki_table_info.html +631 -0
  128. data/spec/formatters/table_formatter_spec.rb +137 -0
  129. data/spec/helpers_spec.rb +8 -0
  130. data/spec/index/categorical_index_spec.rb +170 -0
  131. data/spec/index/index_spec.rb +417 -0
  132. data/spec/index/multi_index_spec.rb +680 -0
  133. data/spec/io/io_spec.rb +373 -0
  134. data/spec/io/sql_data_source_spec.rb +56 -0
  135. data/spec/iruby/dataframe_spec.rb +170 -0
  136. data/spec/iruby/helpers_spec.rb +49 -0
  137. data/spec/iruby/multi_index_spec.rb +37 -0
  138. data/spec/iruby/vector_spec.rb +105 -0
  139. data/spec/maths/arithmetic/dataframe_spec.rb +148 -0
  140. data/spec/maths/arithmetic/vector_spec.rb +165 -0
  141. data/spec/maths/statistics/dataframe_spec.rb +178 -0
  142. data/spec/maths/statistics/vector_spec.rb +756 -0
  143. data/spec/monkeys_spec.rb +42 -0
  144. data/spec/shared/vector_display_spec.rb +213 -0
  145. data/spec/spec_helper.rb +87 -0
  146. data/spec/support/database_helper.rb +30 -0
  147. data/spec/support/matchers.rb +5 -0
  148. data/spec/vector_spec.rb +2293 -0
  149. metadata +571 -0
@@ -0,0 +1,197 @@
1
+ describe DaruLite::Vector do
2
+ context "#initialize" do
3
+ it "accepts DateTimeIndex in index option" do
4
+ index = DaruLite::DateTimeIndex.date_range(:start => DateTime.new(2012,2,1), periods: 100)
5
+ vector = DaruLite::Vector.new [1,2,3,4,5]*20, index: index
6
+
7
+ expect(vector.class).to eq(DaruLite::Vector)
8
+ expect(vector['2012-2-3']).to eq(3)
9
+ end
10
+ end
11
+
12
+ context "#[]" do
13
+ before do
14
+ index = DaruLite::DateTimeIndex.date_range(
15
+ :start => DateTime.new(2012,4,4), :end => DateTime.new(2012,4,7), freq: 'H')
16
+ @vector = DaruLite::Vector.new([23]*index.size, index: index)
17
+ end
18
+
19
+ it "returns the element when complete date" do
20
+ expect(@vector['2012-4-4 22:00:00']).to eq(23)
21
+ end
22
+
23
+ it "accepts DateTime object for [] argument" do
24
+ expect(@vector[DateTime.new(2012,4,4,22)]).to eq(23)
25
+ end
26
+
27
+ it "returns slice when partial date" do
28
+ slice_index = DaruLite::DateTimeIndex.date_range(
29
+ :start => DateTime.new(2012,4,4), :periods => 24, freq: 'H')
30
+ expect(@vector['2012-4-4']).to eq(
31
+ DaruLite::Vector.new([23]*slice_index.size, index: slice_index))
32
+ end
33
+
34
+ it "returns a slice when range" do
35
+ slice_index = DaruLite::DateTimeIndex.date_range(
36
+ :start => DateTime.new(2012,4,4), :end => DateTime.new(2012,4,5,23,), freq: 'H')
37
+ expect(@vector['2012-4-4'..'2012-4-5']).to eq(
38
+ DaruLite::Vector.new([23]*slice_index.size, index: slice_index))
39
+ end
40
+
41
+ it "returns a slice when numeric range" do
42
+ slice_index = DaruLite::DateTimeIndex.date_range(
43
+ :start => DateTime.new(2012,4,4), :periods => 20, :freq => 'H')
44
+ expect(@vector[0..19]).to eq(
45
+ DaruLite::Vector.new([23]*slice_index.size, index: slice_index))
46
+ end
47
+
48
+ it "returns the element when number" do
49
+ expect(@vector[32]).to eq(23)
50
+ end
51
+ end
52
+
53
+ context "#[]=" do
54
+ it "assigns a single element when index complete" do
55
+ index = DaruLite::DateTimeIndex.date_range(:start => '2012', :periods => 5, :freq => 'D')
56
+ vector = DaruLite::Vector.new([1,2,3,4,5], index: index)
57
+ vector['2012-1-4'] = 666
58
+ expect(vector).to eq(DaruLite::Vector.new([1,2,3,666,5], index: index))
59
+ end
60
+
61
+ it "assigns single element when specified a number for indexing" do
62
+ index = DaruLite::DateTimeIndex.date_range(:start => '2012', :periods => 5, :freq => 'D')
63
+ vector = DaruLite::Vector.new([1,2,3,4,5], index: index)
64
+
65
+ vector[2] = 666
66
+ expect(vector).to eq(
67
+ DaruLite::Vector.new([1,2,666,4,5], index: index))
68
+ end
69
+
70
+ it "assigns multiple elements when index incomplete" do
71
+ index = DaruLite::DateTimeIndex.date_range(:start => '2012', :periods => 100,
72
+ :freq => 'MB')
73
+ vector = DaruLite::Vector.new([1,2,3,4,5,6,7,8,9,10]*10, index: index)
74
+ vector['2012'] = 666
75
+ arr = [666]*12 + [3,4,5,6,7,8,9,10] + [1,2,3,4,5,6,7,8,9,10]*8
76
+ expect(vector).to eq(DaruLite::Vector.new(arr, index: index))
77
+ end
78
+ end
79
+ end
80
+
81
+ describe DaruLite::DataFrame do
82
+ before :each do
83
+ @index = DaruLite::DateTimeIndex.date_range(:start => '2012-2-1', periods: 100)
84
+ @order = DaruLite::DateTimeIndex.new([
85
+ DateTime.new(2012,1,3),DateTime.new(2013,2,3),DateTime.new(2012,3,3)])
86
+ @a = [1,2,3,4,5]*20
87
+ @b = @a.map { |e| e*3 }
88
+ @c = @a.map(&:to_s)
89
+ @df = DaruLite::DataFrame.new([@a, @b, @c], index: @index, order: @order)
90
+ end
91
+
92
+ context "#initialize" do
93
+ it "accepts DateTimeIndex for index and order options" do
94
+ expect(@df.index).to eq(@index)
95
+ expect(@df['2013-2-3']).to eq(
96
+ DaruLite::Vector.new(@b, index: @index))
97
+ end
98
+ end
99
+
100
+ context "#[]" do
101
+ it "returns one Vector when complete index" do
102
+ expect(@df['2012-3-3']).to eq(DaruLite::Vector.new(@c, index: @index))
103
+ end
104
+
105
+ it "returns a Vector when DateTime object specified" do
106
+ expect(@df[DateTime.new(2012,3,3)]).to eq(
107
+ DaruLite::Vector.new(@c, index: @index))
108
+ end
109
+
110
+ it "returns DataFrame when incomplete index" do
111
+ answer = DaruLite::DataFrame.new(
112
+ [@a, @c], index: @index, order: DaruLite::DateTimeIndex.new([
113
+ DateTime.new(2012,1,3),DateTime.new(2012,3,3)]))
114
+ expect(@df['2012']).to eq(answer)
115
+ end
116
+
117
+ it "returns Vector when single index specified as a number" do
118
+ expect(@df[1]).to eq(DaruLite::Vector.new(@b, index: @index))
119
+ end
120
+ end
121
+
122
+ context "#[]=" do
123
+ it "assigns one Vector when complete index" do
124
+ answer = DaruLite::DataFrame.new([@a, @b, @a], index: @index, order: @order)
125
+ @df['2012-3-3'] = @a
126
+ expect(@df).to eq(answer)
127
+ end
128
+
129
+ it "assigns one Vector when index as DateTime object" do
130
+ answer = DaruLite::DataFrame.new([@a, @b, @a], index: @index, order: @order)
131
+ @df[DateTime.new(2012,3,3)] = @a
132
+ expect(@df).to eq(answer)
133
+ end
134
+
135
+ it "assigns multiple vectors when incomplete index" do
136
+ answer = DaruLite::DataFrame.new([@b,@b,@b], index: @index, order: @order)
137
+ @df['2012'] = @b
138
+ expect(@df).to eq(answer)
139
+ end
140
+
141
+ it "assigns Vector when specified position index" do
142
+ answer = DaruLite::DataFrame.new([@a, @b, @a], index: @index, order: @order)
143
+ @df[2] = @a
144
+ expect(@df).to eq(answer)
145
+ end
146
+ end
147
+
148
+ context "#row[]" do
149
+ it "returns one row Vector when complete index" do
150
+ expect(@df.row['2012-2-1']).to eq(DaruLite::Vector.new([1,3,"1"], index: @order))
151
+ end
152
+
153
+ it "returns one row when complete DateTime specified" do
154
+ expect(@df.row[DateTime.new(2012,2,1)]).to eq(
155
+ DaruLite::Vector.new([1,3,"1"], index: @order))
156
+ end
157
+
158
+ it "returns DataFrame when incomplete index" do
159
+ range = 0..28
160
+ a = @a[range]
161
+ b = @b[range]
162
+ c = @c[range]
163
+ i = DaruLite::DateTimeIndex.date_range(:start => '2012-2-1', periods: 29)
164
+ answer = DaruLite::DataFrame.new([a,b,c], index: i, order: @order)
165
+
166
+ expect(@df.row['2012-2']).to eq(answer)
167
+ end
168
+
169
+ it "returns one row Vector when position index" do
170
+ expect(@df.row[2]).to eq(DaruLite::Vector.new([3,9,'3'], index: @order))
171
+ end
172
+ end
173
+
174
+ context "#row[]=" do
175
+ it "assigns one row Vector when complete index" do
176
+ @df.row['2012-2-4'] = [666,999,0]
177
+ expect(@df.row['2012-2-4']).to eq(DaruLite::Vector.new([666,999,0], index: @order))
178
+ end
179
+
180
+ it "assigns one row Vector when complete index as DateTime" do
181
+ @df.row[DateTime.new(2012,2,5)] = [1,2,3]
182
+ expect(@df.row[DateTime.new(2012,2,5)]).to eq(
183
+ DaruLite::Vector.new([1,2,3], index: @order))
184
+ end
185
+
186
+ it "assigns multiple rows when incomplete index" do
187
+ a = [666]*29
188
+ b = [999]*29
189
+ c = [0]*29
190
+ index = DaruLite::DateTimeIndex.date_range(:start => '2012-2-1', :periods => 29)
191
+ answer = DaruLite::DataFrame.new([a,b,c], index: index, order: @order)
192
+ @df.row['2012-2'] = [666,999,0]
193
+
194
+ expect(@df.row['2012-2']).to eq(answer)
195
+ end
196
+ end
197
+ end
@@ -0,0 +1,72 @@
1
+ include DaruLite
2
+
3
+ describe DaruLite::DateTimeIndexHelper do
4
+
5
+
6
+ describe '.infer_offset' do
7
+ subject(:offset) { DaruLite::DateTimeIndexHelper.infer_offset(data) }
8
+
9
+ context 'when the dataset does not have a regular offset' do
10
+ let(:data) do
11
+ [
12
+ DateTime.new(2020, 1, 1, 00, 00, 00),
13
+ DateTime.new(2020, 1, 1, 00, 01, 00),
14
+ DateTime.new(2020, 1, 1, 00, 05, 00),
15
+ ]
16
+ end
17
+
18
+ it 'returns nil' do
19
+ expect(offset).to be_nil
20
+ end
21
+ end
22
+
23
+ context 'when the dataset matches a defined offset' do
24
+ let(:data) do
25
+ [
26
+ DateTime.new(2020, 1, 1, 00, 00, 00),
27
+ DateTime.new(2020, 1, 1, 00, 01, 00),
28
+ DateTime.new(2020, 1, 1, 00, 02, 00),
29
+ ]
30
+ end
31
+
32
+ it 'returns the matched offset' do
33
+ expect(offset).to be_an_instance_of(DaruLite::Offsets::Minute)
34
+ end
35
+ end
36
+
37
+ context 'when the offset is a multiple of seconds' do
38
+ let(:data) do
39
+ [
40
+ DateTime.new(2020, 1, 1, 00, 00, 00),
41
+ DateTime.new(2020, 1, 1, 00, 00, 03),
42
+ DateTime.new(2020, 1, 1, 00, 00, 06),
43
+ ]
44
+ end
45
+
46
+ let(:expected_offset) { DaruLite::Offsets::Second.new(3) }
47
+
48
+ it 'returns a Second offset' do
49
+ expect(offset).to be_an_instance_of(DaruLite::Offsets::Second)
50
+ end
51
+
52
+ it 'has the correct multiplier' do
53
+ expect(offset.freq_string).to eql(expected_offset.freq_string)
54
+ end
55
+ end
56
+
57
+ context 'when the offset is less than a second' do
58
+ let(:data) do
59
+ [
60
+ DateTime.new(2020, 1, 1, 00, 00, 00) + 0.00001,
61
+ DateTime.new(2020, 1, 1, 00, 00, 00) + 0.00002,
62
+ DateTime.new(2020, 1, 1, 00, 00, 00) + 0.00003,
63
+ ]
64
+ end
65
+
66
+ it 'returns nil' do
67
+ expect(offset).to be_nil
68
+ end
69
+ end
70
+ end
71
+
72
+ end