daru_lite 0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.github/ISSUE_TEMPLATE.md +18 -0
- data/.github/workflows/ci.yml +33 -0
- data/.gitignore +10 -0
- data/.rspec +2 -0
- data/.rubocop.yml +27 -0
- data/.rubocop_todo.yml +137 -0
- data/CONTRIBUTING.md +47 -0
- data/Gemfile +2 -0
- data/History.md +4 -0
- data/LICENSE +24 -0
- data/README.md +218 -0
- data/Rakefile +69 -0
- data/ReleasePolicy.md +20 -0
- data/benchmarks/TradeoffData.csv +65 -0
- data/benchmarks/csv_reading.rb +22 -0
- data/benchmarks/dataframe_creation.rb +39 -0
- data/benchmarks/db_loading.rb +34 -0
- data/benchmarks/duplicating.rb +45 -0
- data/benchmarks/group_by.rb +32 -0
- data/benchmarks/joining.rb +52 -0
- data/benchmarks/row_access.rb +41 -0
- data/benchmarks/row_assign.rb +36 -0
- data/benchmarks/sorting.rb +51 -0
- data/benchmarks/statistics.rb +28 -0
- data/benchmarks/vector_access.rb +31 -0
- data/benchmarks/vector_assign.rb +42 -0
- data/benchmarks/where_clause.rb +48 -0
- data/benchmarks/where_vs_filter.rb +28 -0
- data/daru_lite.gemspec +55 -0
- data/images/README.md +5 -0
- data/images/con0.png +0 -0
- data/images/con1.png +0 -0
- data/images/init0.png +0 -0
- data/images/init1.png +0 -0
- data/images/man0.png +0 -0
- data/images/man1.png +0 -0
- data/images/man2.png +0 -0
- data/images/man3.png +0 -0
- data/images/man4.png +0 -0
- data/images/man5.png +0 -0
- data/images/man6.png +0 -0
- data/lib/daru_lite/accessors/array_wrapper.rb +109 -0
- data/lib/daru_lite/accessors/dataframe_by_row.rb +25 -0
- data/lib/daru_lite/accessors/mdarray_wrapper.rb +7 -0
- data/lib/daru_lite/category.rb +929 -0
- data/lib/daru_lite/configuration.rb +34 -0
- data/lib/daru_lite/core/group_by.rb +403 -0
- data/lib/daru_lite/core/merge.rb +270 -0
- data/lib/daru_lite/core/query.rb +109 -0
- data/lib/daru_lite/dataframe.rb +3080 -0
- data/lib/daru_lite/date_time/index.rb +569 -0
- data/lib/daru_lite/date_time/offsets.rb +397 -0
- data/lib/daru_lite/exceptions.rb +2 -0
- data/lib/daru_lite/extensions/which_dsl.rb +53 -0
- data/lib/daru_lite/formatters/table.rb +52 -0
- data/lib/daru_lite/helpers/array.rb +53 -0
- data/lib/daru_lite/index/categorical_index.rb +201 -0
- data/lib/daru_lite/index/index.rb +374 -0
- data/lib/daru_lite/index/multi_index.rb +374 -0
- data/lib/daru_lite/io/csv/converters.rb +21 -0
- data/lib/daru_lite/io/io.rb +294 -0
- data/lib/daru_lite/io/sql_data_source.rb +97 -0
- data/lib/daru_lite/iruby/helpers.rb +38 -0
- data/lib/daru_lite/iruby/templates/dataframe.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/dataframe_mi.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/dataframe_mi_tbody.html.erb +35 -0
- data/lib/daru_lite/iruby/templates/dataframe_mi_thead.html.erb +21 -0
- data/lib/daru_lite/iruby/templates/dataframe_tbody.html.erb +28 -0
- data/lib/daru_lite/iruby/templates/dataframe_thead.html.erb +21 -0
- data/lib/daru_lite/iruby/templates/multi_index.html.erb +12 -0
- data/lib/daru_lite/iruby/templates/vector.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/vector_mi.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/vector_mi_tbody.html.erb +26 -0
- data/lib/daru_lite/iruby/templates/vector_mi_thead.html.erb +8 -0
- data/lib/daru_lite/iruby/templates/vector_tbody.html.erb +17 -0
- data/lib/daru_lite/iruby/templates/vector_thead.html.erb +8 -0
- data/lib/daru_lite/maths/arithmetic/dataframe.rb +91 -0
- data/lib/daru_lite/maths/arithmetic/vector.rb +117 -0
- data/lib/daru_lite/maths/statistics/dataframe.rb +202 -0
- data/lib/daru_lite/maths/statistics/vector.rb +1019 -0
- data/lib/daru_lite/monkeys.rb +56 -0
- data/lib/daru_lite/vector.rb +1678 -0
- data/lib/daru_lite/version.rb +3 -0
- data/lib/daru_lite.rb +99 -0
- data/profile/_base.rb +23 -0
- data/profile/df_to_a.rb +10 -0
- data/profile/filter.rb +13 -0
- data/profile/joining.rb +13 -0
- data/profile/sorting.rb +12 -0
- data/profile/vector_each_with_index.rb +9 -0
- data/profile/vector_new.rb +9 -0
- data/spec/accessors/array_wrapper_spec.rb +3 -0
- data/spec/category_spec.rb +1741 -0
- data/spec/core/group_by_spec.rb +655 -0
- data/spec/core/merge_spec.rb +179 -0
- data/spec/core/query_spec.rb +347 -0
- data/spec/daru_lite_spec.rb +22 -0
- data/spec/dataframe_spec.rb +4330 -0
- data/spec/date_time/data_spec.rb +197 -0
- data/spec/date_time/date_time_index_helper_spec.rb +72 -0
- data/spec/date_time/index_spec.rb +588 -0
- data/spec/date_time/offsets_spec.rb +465 -0
- data/spec/extensions/which_dsl_spec.rb +38 -0
- data/spec/fixtures/bank2.dat +200 -0
- data/spec/fixtures/boolean_converter_test.csv +5 -0
- data/spec/fixtures/countries.json +7794 -0
- data/spec/fixtures/duplicates.csv +32 -0
- data/spec/fixtures/eciresults.html +394 -0
- data/spec/fixtures/empties.dat +2 -0
- data/spec/fixtures/empty_rows_test.csv +17 -0
- data/spec/fixtures/macau.html +3691 -0
- data/spec/fixtures/macd_data.csv +150 -0
- data/spec/fixtures/matrix_test.csv +100 -0
- data/spec/fixtures/moneycontrol.html +6812 -0
- data/spec/fixtures/music_data.tsv +2501 -0
- data/spec/fixtures/repeated_fields.csv +7 -0
- data/spec/fixtures/sales-funnel.csv +18 -0
- data/spec/fixtures/scientific_notation.csv +4 -0
- data/spec/fixtures/string_converter_test.csv +5 -0
- data/spec/fixtures/strings.dat +2 -0
- data/spec/fixtures/test_xls.xls +0 -0
- data/spec/fixtures/test_xls_2.xls +0 -0
- data/spec/fixtures/url_test.txt~ +0 -0
- data/spec/fixtures/valid_markup.html +62 -0
- data/spec/fixtures/wiki_climate.html +1243 -0
- data/spec/fixtures/wiki_table_info.html +631 -0
- data/spec/formatters/table_formatter_spec.rb +137 -0
- data/spec/helpers_spec.rb +8 -0
- data/spec/index/categorical_index_spec.rb +170 -0
- data/spec/index/index_spec.rb +417 -0
- data/spec/index/multi_index_spec.rb +680 -0
- data/spec/io/io_spec.rb +373 -0
- data/spec/io/sql_data_source_spec.rb +56 -0
- data/spec/iruby/dataframe_spec.rb +170 -0
- data/spec/iruby/helpers_spec.rb +49 -0
- data/spec/iruby/multi_index_spec.rb +37 -0
- data/spec/iruby/vector_spec.rb +105 -0
- data/spec/maths/arithmetic/dataframe_spec.rb +148 -0
- data/spec/maths/arithmetic/vector_spec.rb +165 -0
- data/spec/maths/statistics/dataframe_spec.rb +178 -0
- data/spec/maths/statistics/vector_spec.rb +756 -0
- data/spec/monkeys_spec.rb +42 -0
- data/spec/shared/vector_display_spec.rb +213 -0
- data/spec/spec_helper.rb +87 -0
- data/spec/support/database_helper.rb +30 -0
- data/spec/support/matchers.rb +5 -0
- data/spec/vector_spec.rb +2293 -0
- metadata +571 -0
@@ -0,0 +1,109 @@
|
|
1
|
+
module DaruLite
|
2
|
+
module Core
|
3
|
+
module Query
|
4
|
+
class BoolArray
|
5
|
+
attr_reader :barry
|
6
|
+
|
7
|
+
def initialize(barry)
|
8
|
+
@barry = barry
|
9
|
+
end
|
10
|
+
|
11
|
+
def &(other)
|
12
|
+
BoolArray.new(@barry.zip(other.barry).map { |b, o| b && o })
|
13
|
+
end
|
14
|
+
|
15
|
+
alias and &
|
16
|
+
|
17
|
+
def |(other)
|
18
|
+
BoolArray.new(@barry.zip(other.barry).map { |b, o| b || o })
|
19
|
+
end
|
20
|
+
|
21
|
+
alias or |
|
22
|
+
|
23
|
+
def !
|
24
|
+
BoolArray.new(@barry.map(&:!))
|
25
|
+
end
|
26
|
+
|
27
|
+
def ==(other)
|
28
|
+
@barry == other.barry
|
29
|
+
end
|
30
|
+
|
31
|
+
def to_a
|
32
|
+
@barry
|
33
|
+
end
|
34
|
+
|
35
|
+
def inspect
|
36
|
+
"#<#{self.class}:#{object_id} bool_arry=#{@barry}>"
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
class << self
|
41
|
+
def apply_scalar_operator(operator, data, other)
|
42
|
+
BoolArray.new(data.map { |d| !!d.send(operator, other) if d.respond_to?(operator) })
|
43
|
+
end
|
44
|
+
|
45
|
+
def apply_vector_operator(operator, vector, other)
|
46
|
+
BoolArray.new(vector.zip(other).map { |d, o| !!d.send(operator, o) })
|
47
|
+
end
|
48
|
+
|
49
|
+
def df_where(data_frame, bool_array)
|
50
|
+
vecs = data_frame.map do |vector|
|
51
|
+
vector.where(bool_array)
|
52
|
+
end
|
53
|
+
|
54
|
+
DaruLite::DataFrame.new(
|
55
|
+
vecs, order: data_frame.vectors, index: vecs[0].index, clone: false
|
56
|
+
)
|
57
|
+
end
|
58
|
+
|
59
|
+
def vector_where(dv, bool_array)
|
60
|
+
new_data, new_index = fetch_new_data_and_index dv, bool_array
|
61
|
+
|
62
|
+
resultant_dv = DaruLite::Vector.new new_data,
|
63
|
+
index: dv.index.class.new(new_index),
|
64
|
+
dtype: dv.dtype,
|
65
|
+
type: dv.type,
|
66
|
+
name: dv.name
|
67
|
+
|
68
|
+
# Preserve categories order for category vector
|
69
|
+
resultant_dv.categories = dv.categories if dv.category?
|
70
|
+
resultant_dv
|
71
|
+
end
|
72
|
+
|
73
|
+
def vector_apply_where(dv, bool_array)
|
74
|
+
_data, new_index = fetch_new_data_and_index dv, bool_array
|
75
|
+
all_index = dv.index
|
76
|
+
all_data = all_index.map { |idx| new_index.include?(idx) ? yield(dv[idx]) : dv[idx] }
|
77
|
+
|
78
|
+
resultant_dv = DaruLite::Vector.new all_data,
|
79
|
+
index: dv.index.class.new(all_index),
|
80
|
+
dtype: dv.dtype,
|
81
|
+
type: dv.type,
|
82
|
+
name: dv.name
|
83
|
+
|
84
|
+
# Preserve categories order for category vector
|
85
|
+
resultant_dv.categories = dv.categories if dv.category?
|
86
|
+
resultant_dv
|
87
|
+
end
|
88
|
+
|
89
|
+
private
|
90
|
+
|
91
|
+
def fetch_new_data_and_index(dv, bool_array)
|
92
|
+
barry = bool_array.to_a
|
93
|
+
positions = dv.size.times.select { |i| barry[i] }
|
94
|
+
# FIXME: The below 4 lines should be replaced with values_at when the
|
95
|
+
# stack error is fixed in Ruby.
|
96
|
+
# eg - new_data = dv.data.data.values_at(*positions)
|
97
|
+
|
98
|
+
data = dv.type == :category ? dv.to_a : dv.data.data # non-cat optimize
|
99
|
+
new_data = positions.map { |i| data[i] }
|
100
|
+
|
101
|
+
idx = dv.index.to_a
|
102
|
+
new_index = positions.map { |i| idx[i] }
|
103
|
+
|
104
|
+
[new_data, new_index]
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|