daru_lite 0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.github/ISSUE_TEMPLATE.md +18 -0
- data/.github/workflows/ci.yml +33 -0
- data/.gitignore +10 -0
- data/.rspec +2 -0
- data/.rubocop.yml +27 -0
- data/.rubocop_todo.yml +137 -0
- data/CONTRIBUTING.md +47 -0
- data/Gemfile +2 -0
- data/History.md +4 -0
- data/LICENSE +24 -0
- data/README.md +218 -0
- data/Rakefile +69 -0
- data/ReleasePolicy.md +20 -0
- data/benchmarks/TradeoffData.csv +65 -0
- data/benchmarks/csv_reading.rb +22 -0
- data/benchmarks/dataframe_creation.rb +39 -0
- data/benchmarks/db_loading.rb +34 -0
- data/benchmarks/duplicating.rb +45 -0
- data/benchmarks/group_by.rb +32 -0
- data/benchmarks/joining.rb +52 -0
- data/benchmarks/row_access.rb +41 -0
- data/benchmarks/row_assign.rb +36 -0
- data/benchmarks/sorting.rb +51 -0
- data/benchmarks/statistics.rb +28 -0
- data/benchmarks/vector_access.rb +31 -0
- data/benchmarks/vector_assign.rb +42 -0
- data/benchmarks/where_clause.rb +48 -0
- data/benchmarks/where_vs_filter.rb +28 -0
- data/daru_lite.gemspec +55 -0
- data/images/README.md +5 -0
- data/images/con0.png +0 -0
- data/images/con1.png +0 -0
- data/images/init0.png +0 -0
- data/images/init1.png +0 -0
- data/images/man0.png +0 -0
- data/images/man1.png +0 -0
- data/images/man2.png +0 -0
- data/images/man3.png +0 -0
- data/images/man4.png +0 -0
- data/images/man5.png +0 -0
- data/images/man6.png +0 -0
- data/lib/daru_lite/accessors/array_wrapper.rb +109 -0
- data/lib/daru_lite/accessors/dataframe_by_row.rb +25 -0
- data/lib/daru_lite/accessors/mdarray_wrapper.rb +7 -0
- data/lib/daru_lite/category.rb +929 -0
- data/lib/daru_lite/configuration.rb +34 -0
- data/lib/daru_lite/core/group_by.rb +403 -0
- data/lib/daru_lite/core/merge.rb +270 -0
- data/lib/daru_lite/core/query.rb +109 -0
- data/lib/daru_lite/dataframe.rb +3080 -0
- data/lib/daru_lite/date_time/index.rb +569 -0
- data/lib/daru_lite/date_time/offsets.rb +397 -0
- data/lib/daru_lite/exceptions.rb +2 -0
- data/lib/daru_lite/extensions/which_dsl.rb +53 -0
- data/lib/daru_lite/formatters/table.rb +52 -0
- data/lib/daru_lite/helpers/array.rb +53 -0
- data/lib/daru_lite/index/categorical_index.rb +201 -0
- data/lib/daru_lite/index/index.rb +374 -0
- data/lib/daru_lite/index/multi_index.rb +374 -0
- data/lib/daru_lite/io/csv/converters.rb +21 -0
- data/lib/daru_lite/io/io.rb +294 -0
- data/lib/daru_lite/io/sql_data_source.rb +97 -0
- data/lib/daru_lite/iruby/helpers.rb +38 -0
- data/lib/daru_lite/iruby/templates/dataframe.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/dataframe_mi.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/dataframe_mi_tbody.html.erb +35 -0
- data/lib/daru_lite/iruby/templates/dataframe_mi_thead.html.erb +21 -0
- data/lib/daru_lite/iruby/templates/dataframe_tbody.html.erb +28 -0
- data/lib/daru_lite/iruby/templates/dataframe_thead.html.erb +21 -0
- data/lib/daru_lite/iruby/templates/multi_index.html.erb +12 -0
- data/lib/daru_lite/iruby/templates/vector.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/vector_mi.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/vector_mi_tbody.html.erb +26 -0
- data/lib/daru_lite/iruby/templates/vector_mi_thead.html.erb +8 -0
- data/lib/daru_lite/iruby/templates/vector_tbody.html.erb +17 -0
- data/lib/daru_lite/iruby/templates/vector_thead.html.erb +8 -0
- data/lib/daru_lite/maths/arithmetic/dataframe.rb +91 -0
- data/lib/daru_lite/maths/arithmetic/vector.rb +117 -0
- data/lib/daru_lite/maths/statistics/dataframe.rb +202 -0
- data/lib/daru_lite/maths/statistics/vector.rb +1019 -0
- data/lib/daru_lite/monkeys.rb +56 -0
- data/lib/daru_lite/vector.rb +1678 -0
- data/lib/daru_lite/version.rb +3 -0
- data/lib/daru_lite.rb +99 -0
- data/profile/_base.rb +23 -0
- data/profile/df_to_a.rb +10 -0
- data/profile/filter.rb +13 -0
- data/profile/joining.rb +13 -0
- data/profile/sorting.rb +12 -0
- data/profile/vector_each_with_index.rb +9 -0
- data/profile/vector_new.rb +9 -0
- data/spec/accessors/array_wrapper_spec.rb +3 -0
- data/spec/category_spec.rb +1741 -0
- data/spec/core/group_by_spec.rb +655 -0
- data/spec/core/merge_spec.rb +179 -0
- data/spec/core/query_spec.rb +347 -0
- data/spec/daru_lite_spec.rb +22 -0
- data/spec/dataframe_spec.rb +4330 -0
- data/spec/date_time/data_spec.rb +197 -0
- data/spec/date_time/date_time_index_helper_spec.rb +72 -0
- data/spec/date_time/index_spec.rb +588 -0
- data/spec/date_time/offsets_spec.rb +465 -0
- data/spec/extensions/which_dsl_spec.rb +38 -0
- data/spec/fixtures/bank2.dat +200 -0
- data/spec/fixtures/boolean_converter_test.csv +5 -0
- data/spec/fixtures/countries.json +7794 -0
- data/spec/fixtures/duplicates.csv +32 -0
- data/spec/fixtures/eciresults.html +394 -0
- data/spec/fixtures/empties.dat +2 -0
- data/spec/fixtures/empty_rows_test.csv +17 -0
- data/spec/fixtures/macau.html +3691 -0
- data/spec/fixtures/macd_data.csv +150 -0
- data/spec/fixtures/matrix_test.csv +100 -0
- data/spec/fixtures/moneycontrol.html +6812 -0
- data/spec/fixtures/music_data.tsv +2501 -0
- data/spec/fixtures/repeated_fields.csv +7 -0
- data/spec/fixtures/sales-funnel.csv +18 -0
- data/spec/fixtures/scientific_notation.csv +4 -0
- data/spec/fixtures/string_converter_test.csv +5 -0
- data/spec/fixtures/strings.dat +2 -0
- data/spec/fixtures/test_xls.xls +0 -0
- data/spec/fixtures/test_xls_2.xls +0 -0
- data/spec/fixtures/url_test.txt~ +0 -0
- data/spec/fixtures/valid_markup.html +62 -0
- data/spec/fixtures/wiki_climate.html +1243 -0
- data/spec/fixtures/wiki_table_info.html +631 -0
- data/spec/formatters/table_formatter_spec.rb +137 -0
- data/spec/helpers_spec.rb +8 -0
- data/spec/index/categorical_index_spec.rb +170 -0
- data/spec/index/index_spec.rb +417 -0
- data/spec/index/multi_index_spec.rb +680 -0
- data/spec/io/io_spec.rb +373 -0
- data/spec/io/sql_data_source_spec.rb +56 -0
- data/spec/iruby/dataframe_spec.rb +170 -0
- data/spec/iruby/helpers_spec.rb +49 -0
- data/spec/iruby/multi_index_spec.rb +37 -0
- data/spec/iruby/vector_spec.rb +105 -0
- data/spec/maths/arithmetic/dataframe_spec.rb +148 -0
- data/spec/maths/arithmetic/vector_spec.rb +165 -0
- data/spec/maths/statistics/dataframe_spec.rb +178 -0
- data/spec/maths/statistics/vector_spec.rb +756 -0
- data/spec/monkeys_spec.rb +42 -0
- data/spec/shared/vector_display_spec.rb +213 -0
- data/spec/spec_helper.rb +87 -0
- data/spec/support/database_helper.rb +30 -0
- data/spec/support/matchers.rb +5 -0
- data/spec/vector_spec.rb +2293 -0
- metadata +571 -0
@@ -0,0 +1,201 @@
|
|
1
|
+
module DaruLite
|
2
|
+
class CategoricalIndex < Index
|
3
|
+
# Create a categorical index object.
|
4
|
+
# @param indexes [Array<object>] array of indexes
|
5
|
+
# @return [DaruLite::CategoricalIndex] categorical index
|
6
|
+
# @example
|
7
|
+
# DaruLite::CategoricalIndex.new [:a, 1, :a, 1, :c]
|
8
|
+
# # => #<DaruLite::CategoricalIndex(5): {a, 1, a, 1, c}>
|
9
|
+
def initialize(indexes)
|
10
|
+
# Create a hash to map each category to positional indexes
|
11
|
+
categories = indexes.each_with_index.group_by(&:first)
|
12
|
+
@cat_hash = categories.transform_values { |group| group.map(&:last) }
|
13
|
+
|
14
|
+
# Map each category to a unique integer for effective storage in @array
|
15
|
+
map_cat_int = categories.keys.each_with_index.to_h
|
16
|
+
|
17
|
+
# To link every instance to its category,
|
18
|
+
# it stores integer for every instance representing its category
|
19
|
+
@array = map_cat_int.values_at(*indexes)
|
20
|
+
end
|
21
|
+
|
22
|
+
# Duplicates the index object and return it
|
23
|
+
# @return [DaruLite::CategoricalIndex] duplicated index object
|
24
|
+
def dup
|
25
|
+
# Improve it by intializing index by hash
|
26
|
+
DaruLite::CategoricalIndex.new to_a
|
27
|
+
end
|
28
|
+
|
29
|
+
# Returns true index or category is valid
|
30
|
+
# @param index [object] the index value to look for
|
31
|
+
# @return [true, false] true if index is included, false otherwise
|
32
|
+
def include?(index)
|
33
|
+
@cat_hash.include? index
|
34
|
+
end
|
35
|
+
|
36
|
+
# Returns array of categories
|
37
|
+
# @example
|
38
|
+
# x = DaruLite::CategoricalIndex.new [:a, 1, :a, 1, :c]
|
39
|
+
# x.categories
|
40
|
+
# # => [:a, 1, :c]
|
41
|
+
def categories
|
42
|
+
@cat_hash.keys
|
43
|
+
end
|
44
|
+
|
45
|
+
# Returns positions given categories or positions
|
46
|
+
# @note If the argument does not a valid category it treats it as position
|
47
|
+
# value and return it as it is.
|
48
|
+
# @param indexes [Array<object>] categories or positions
|
49
|
+
# @example
|
50
|
+
# x = DaruLite::CategoricalIndex.new [:a, 1, :a, 1, :c]
|
51
|
+
# x.pos :a, 1
|
52
|
+
# # => [0, 1, 2, 3]
|
53
|
+
def pos(*indexes)
|
54
|
+
positions = indexes.map do |index|
|
55
|
+
if include? index
|
56
|
+
@cat_hash[index]
|
57
|
+
elsif index.is_a?(Numeric) && index < @array.size
|
58
|
+
index
|
59
|
+
else
|
60
|
+
raise IndexError, "#{index.inspect} is neither a valid category nor a valid position"
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
positions.flatten!
|
65
|
+
positions.size == 1 ? positions.first : positions.sort
|
66
|
+
end
|
67
|
+
|
68
|
+
# Returns index value from position
|
69
|
+
# @param pos [Integer] the position to look for
|
70
|
+
# @return [object] category corresponding to position
|
71
|
+
# @example
|
72
|
+
# idx = DaruLite::CategoricalIndex.new [:a, :b, :a, :b, :c]
|
73
|
+
# idx.index_from_pos 1
|
74
|
+
# # => :b
|
75
|
+
def index_from_pos(pos)
|
76
|
+
cat_from_int @array[pos]
|
77
|
+
end
|
78
|
+
|
79
|
+
# Returns enumerator enumerating all index values in the order they occur
|
80
|
+
# @return [Enumerator] all index values
|
81
|
+
# @example
|
82
|
+
# idx = DaruLite::CategoricalIndex.new [:a, :a, :b]
|
83
|
+
# idx.each.to_a
|
84
|
+
# # => [:a, :a, :b]
|
85
|
+
def each
|
86
|
+
return enum_for(:each) unless block_given?
|
87
|
+
|
88
|
+
@array.each { |pos| yield cat_from_int pos }
|
89
|
+
self
|
90
|
+
end
|
91
|
+
|
92
|
+
# Compares two index object. Returns true if every instance of category
|
93
|
+
# occur at the same position
|
94
|
+
# @param [DaruLite::CateogricalIndex] other index object to be checked against
|
95
|
+
# @return [true, false] true if other is similar to self
|
96
|
+
# @example
|
97
|
+
# a = DaruLite::CategoricalIndex.new [:a, :a, :b]
|
98
|
+
# b = DaruLite::CategoricalIndex.new [:b, :a, :a]
|
99
|
+
# a == b
|
100
|
+
# # => false
|
101
|
+
def ==(other)
|
102
|
+
self.class == other.class &&
|
103
|
+
size == other.size &&
|
104
|
+
to_h == other.to_h
|
105
|
+
end
|
106
|
+
|
107
|
+
# Returns all the index values
|
108
|
+
# @return [Array] all index values
|
109
|
+
# @example
|
110
|
+
# idx = DaruLite::CategoricalIndex.new [:a, :b, :a]
|
111
|
+
# idx.to_a
|
112
|
+
def to_a
|
113
|
+
each.to_a
|
114
|
+
end
|
115
|
+
|
116
|
+
# Returns hash table mapping category to positions at which they occur
|
117
|
+
# @return [Hash] hash table mapping category to array of positions
|
118
|
+
# @example
|
119
|
+
# idx = DaruLite::CategoricalIndex.new [:a, :b, :a]
|
120
|
+
# idx.to_h
|
121
|
+
# # => {:a=>[0, 2], :b=>[1]}
|
122
|
+
def to_h
|
123
|
+
@cat_hash
|
124
|
+
end
|
125
|
+
|
126
|
+
# Returns size of the index object
|
127
|
+
# @return [Integer] total number of instances of all categories
|
128
|
+
# @example
|
129
|
+
# idx = DaruLite::CategoricalIndex.new [:a, :b, :a]
|
130
|
+
# idx.size
|
131
|
+
# # => 3
|
132
|
+
def size
|
133
|
+
@array.size
|
134
|
+
end
|
135
|
+
|
136
|
+
# Returns true if index object is storing no category
|
137
|
+
# @return [true, false] true if index object is empty
|
138
|
+
# @example
|
139
|
+
# i = DaruLite::CategoricalIndex.new []
|
140
|
+
# # => #<DaruLite::CategoricalIndex(0): {}>
|
141
|
+
# i.empty?
|
142
|
+
# # => true
|
143
|
+
def empty?
|
144
|
+
@array.empty?
|
145
|
+
end
|
146
|
+
|
147
|
+
# Return subset given categories or positions
|
148
|
+
# @param indexes [Array<object>] categories or positions
|
149
|
+
# @return [DaruLite::CategoricalIndex] subset of the self containing the
|
150
|
+
# mentioned categories or positions
|
151
|
+
# @example
|
152
|
+
# idx = DaruLite::CategoricalIndex.new [:a, :b, :a, :b, :c]
|
153
|
+
# idx.subset :a, :b
|
154
|
+
# # => #<DaruLite::CategoricalIndex(4): {a, b, a, b}>
|
155
|
+
def subset(*indexes)
|
156
|
+
positions = pos(*indexes)
|
157
|
+
new_index = positions.map { |pos| index_from_pos pos }
|
158
|
+
|
159
|
+
DaruLite::CategoricalIndex.new new_index.flatten
|
160
|
+
end
|
161
|
+
|
162
|
+
# Takes positional values and returns subset of the self
|
163
|
+
# capturing the categories at mentioned positions
|
164
|
+
# @param positions [Array<Integer>] positional values
|
165
|
+
# @return [object] index object
|
166
|
+
# @example
|
167
|
+
# idx = DaruLite::CategoricalIndex.new [:a, :b, :a, :b, :c]
|
168
|
+
# idx.at 0, 1
|
169
|
+
# # => #<DaruLite::CategoricalIndex(2): {a, b}>
|
170
|
+
def at(*positions)
|
171
|
+
positions = preprocess_positions(*positions)
|
172
|
+
validate_positions(*positions)
|
173
|
+
if positions.is_a? Integer
|
174
|
+
index_from_pos(positions)
|
175
|
+
else
|
176
|
+
DaruLite::CategoricalIndex.new(positions.map { |p| index_from_pos(p) })
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
# Add specified index values to the index object
|
181
|
+
# @param indexes [Array<object>] index values to add
|
182
|
+
# @return [DaruLite::CategoricalIndex] index object with added values
|
183
|
+
# @example
|
184
|
+
# idx = DaruLite::CategoricalIndex.new [:a, :b, :a, :b, :c]
|
185
|
+
# idx.add :d
|
186
|
+
# # => #<DaruLite::CategoricalIndex(6): {a, b, a, b, c, d}>
|
187
|
+
def add(*indexes)
|
188
|
+
DaruLite::CategoricalIndex.new(to_a + indexes)
|
189
|
+
end
|
190
|
+
|
191
|
+
private
|
192
|
+
|
193
|
+
def int_from_cat(cat)
|
194
|
+
@cat_hash.keys.index cat
|
195
|
+
end
|
196
|
+
|
197
|
+
def cat_from_int(cat)
|
198
|
+
@cat_hash.keys[cat]
|
199
|
+
end
|
200
|
+
end
|
201
|
+
end
|
@@ -0,0 +1,374 @@
|
|
1
|
+
module DaruLite
|
2
|
+
class Index # rubocop:disable Metrics/ClassLength
|
3
|
+
include Enumerable
|
4
|
+
# It so happens that over riding the .new method in a super class also
|
5
|
+
# tampers with the default .new method for class that inherit from the
|
6
|
+
# super class (Index in this case). Thus we first alias the original
|
7
|
+
# new method (from Object) to __new__ when the Index class is evaluated,
|
8
|
+
# and then we use an inherited hook such that the old new method (from
|
9
|
+
# Object) is once again the default .new for the subclass.
|
10
|
+
# Refer http://blog.sidu.in/2007/12/rubys-new-as-factory.html
|
11
|
+
class << self
|
12
|
+
alias __new__ new
|
13
|
+
|
14
|
+
def inherited(subclass)
|
15
|
+
class << subclass
|
16
|
+
alias_method :new, :__new__
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
# We over-ride the .new method so that any sort of Index can be generated
|
22
|
+
# from DaruLite::Index based on the types of arguments supplied.
|
23
|
+
def self.new(*args, &block)
|
24
|
+
# FIXME: I'm not sure this clever trick really deserves our attention.
|
25
|
+
# Most of common ruby libraries just avoid it in favor of usual
|
26
|
+
# factor method, like `Index.create`. When `Index.new(...).class != Index`
|
27
|
+
# it just leads to confusion and surprises. - zverok, 2016-05-18
|
28
|
+
source = args.first
|
29
|
+
|
30
|
+
MultiIndex.try_from_tuples(source) ||
|
31
|
+
DateTimeIndex.try_create(source) ||
|
32
|
+
allocate.tap { |i| i.send :initialize, *args, &block }
|
33
|
+
end
|
34
|
+
|
35
|
+
def self.coerce(maybe_index)
|
36
|
+
maybe_index.is_a?(Index) ? maybe_index : DaruLite::Index.new(maybe_index)
|
37
|
+
end
|
38
|
+
|
39
|
+
def each(&block)
|
40
|
+
return to_enum(:each) unless block
|
41
|
+
|
42
|
+
@relation_hash.each_key(&block)
|
43
|
+
self
|
44
|
+
end
|
45
|
+
|
46
|
+
attr_reader :relation_hash, :size
|
47
|
+
attr_accessor :name
|
48
|
+
|
49
|
+
# @example
|
50
|
+
#
|
51
|
+
# idx = DaruLite::Index.new [:one, 'one', 1, 2, :two]
|
52
|
+
# => #<DaruLite::Index(5): {one, one, 1, 2, two}>
|
53
|
+
#
|
54
|
+
# # set the name
|
55
|
+
#
|
56
|
+
# idx.name = "index_name"
|
57
|
+
# => "index_name"
|
58
|
+
#
|
59
|
+
# idx
|
60
|
+
# => #<DaruLite::Index(5): index_name {one, one, 1, 2, two}>
|
61
|
+
#
|
62
|
+
# # set the name during initialization
|
63
|
+
#
|
64
|
+
# idx = DaruLite::Index.new [:one, 'one', 1, 2, :two], name: "index_name"
|
65
|
+
# => #<DaruLite::Index(5): index_name {one, one, 1, 2, two}>
|
66
|
+
def initialize(index, opts = {})
|
67
|
+
index = guess_index index
|
68
|
+
@relation_hash = index.each_with_index.to_h.freeze
|
69
|
+
@keys = @relation_hash.keys
|
70
|
+
@size = @relation_hash.size
|
71
|
+
@name = opts[:name]
|
72
|
+
end
|
73
|
+
|
74
|
+
def ==(other)
|
75
|
+
return false if self.class != other.class || other.size != @size
|
76
|
+
|
77
|
+
@keys == other.to_a &&
|
78
|
+
@relation_hash.values == other.relation_hash.values
|
79
|
+
end
|
80
|
+
|
81
|
+
def [](key, *rest)
|
82
|
+
if key.is_a?(Range)
|
83
|
+
by_range key
|
84
|
+
elsif !rest.empty?
|
85
|
+
by_multi_key key, *rest
|
86
|
+
else
|
87
|
+
by_single_key key
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
# Returns true if all arguments are either a valid category or position
|
92
|
+
# @param indexes [Array<object>] categories or positions
|
93
|
+
# @return [true, false]
|
94
|
+
# @example
|
95
|
+
# idx.valid? :a, 2
|
96
|
+
# # => true
|
97
|
+
# idx.valid? 3
|
98
|
+
# # => false
|
99
|
+
def valid?(*indexes)
|
100
|
+
indexes.all? { |i| to_a.include?(i) || (i.is_a?(Numeric) && i < size) }
|
101
|
+
end
|
102
|
+
|
103
|
+
# Returns positions given indexes or positions
|
104
|
+
# @note If the arugent is both a valid index and a valid position,
|
105
|
+
# it will treated as valid index
|
106
|
+
# @param indexes [Array<object>] indexes or positions
|
107
|
+
# @example
|
108
|
+
# x = DaruLite::Index.new [:a, :b, :c]
|
109
|
+
# x.pos :a, 1
|
110
|
+
# # => [0, 1]
|
111
|
+
def pos(*indexes)
|
112
|
+
indexes = preprocess_range(indexes.first) if indexes.first.is_a? Range
|
113
|
+
|
114
|
+
if indexes.size == 1
|
115
|
+
numeric_pos indexes.first
|
116
|
+
else
|
117
|
+
indexes.map { |index| numeric_pos index }
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
def subset(*indexes)
|
122
|
+
if indexes.first.is_a? Range
|
123
|
+
start = indexes.first.begin
|
124
|
+
en = indexes.first.end
|
125
|
+
|
126
|
+
subset_slice start, en
|
127
|
+
elsif include? indexes.first
|
128
|
+
# Assume 'indexes' contain indexes not positions
|
129
|
+
DaruLite::Index.new indexes
|
130
|
+
else
|
131
|
+
# Assume 'indexes' contain positions not indexes
|
132
|
+
DaruLite::Index.new(indexes.map { |k| key k })
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
# Takes positional values and returns subset of the self
|
137
|
+
# capturing the indexes at mentioned positions
|
138
|
+
# @param positions [Array<Integer>] positional values
|
139
|
+
# @return [object] index object
|
140
|
+
# @example
|
141
|
+
# idx = DaruLite::Index.new [:a, :b, :c]
|
142
|
+
# idx.at 0, 1
|
143
|
+
# # => #<DaruLite::Index(2): {a, b}>
|
144
|
+
def at(*positions)
|
145
|
+
positions = preprocess_positions(*positions)
|
146
|
+
validate_positions(*positions)
|
147
|
+
if positions.is_a? Integer
|
148
|
+
key(positions)
|
149
|
+
else
|
150
|
+
self.class.new(positions.map { |v| key(v) })
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
def inspect(threshold = 20)
|
155
|
+
name_part = @name ? "#{@name} " : ''
|
156
|
+
if size <= threshold
|
157
|
+
"#<#{self.class}(#{size}): #{name_part}{#{to_a.join(', ')}}>"
|
158
|
+
else
|
159
|
+
"#<#{self.class}(#{size}): #{name_part}{#{to_a.first(threshold).join(', ')} ... #{to_a.last}}>"
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
def slice(*args)
|
164
|
+
start = args[0]
|
165
|
+
en = args[1]
|
166
|
+
|
167
|
+
start_idx = @relation_hash[start]
|
168
|
+
en_idx = @relation_hash[en]
|
169
|
+
|
170
|
+
if start_idx.nil?
|
171
|
+
nil
|
172
|
+
elsif en_idx.nil?
|
173
|
+
Array(start_idx..size - 1)
|
174
|
+
else
|
175
|
+
Array(start_idx..en_idx)
|
176
|
+
end
|
177
|
+
end
|
178
|
+
|
179
|
+
def subset_slice(*args)
|
180
|
+
start = args[0]
|
181
|
+
en = args[1]
|
182
|
+
|
183
|
+
if start.is_a?(Integer) && en.is_a?(Integer)
|
184
|
+
Index.new @keys[start..en]
|
185
|
+
else
|
186
|
+
start_idx = @relation_hash[start]
|
187
|
+
en_idx = @relation_hash[en]
|
188
|
+
Index.new @keys[start_idx..en_idx]
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
# Produce new index from the set union of two indexes.
|
193
|
+
def |(other)
|
194
|
+
Index.new(to_a | other.to_a)
|
195
|
+
end
|
196
|
+
|
197
|
+
# Produce a new index from the set intersection of two indexes
|
198
|
+
def &(other)
|
199
|
+
Index.new(to_a & other.to_a)
|
200
|
+
end
|
201
|
+
|
202
|
+
def to_a
|
203
|
+
@keys
|
204
|
+
end
|
205
|
+
|
206
|
+
def key(value)
|
207
|
+
return nil unless value.is_a?(Numeric)
|
208
|
+
|
209
|
+
@keys[value]
|
210
|
+
end
|
211
|
+
|
212
|
+
def include?(index)
|
213
|
+
@relation_hash.key? index
|
214
|
+
end
|
215
|
+
|
216
|
+
# @note Do not use it to check for Float::NAN as
|
217
|
+
# Float::NAN == Float::NAN is false
|
218
|
+
# Return vector of booleans with value at ith position is either
|
219
|
+
# true or false depending upon whether index value at position i is equal to
|
220
|
+
# any of the values passed in the argument or not
|
221
|
+
# @param indexes [Array] values to equate with
|
222
|
+
# @return [DaruLite::Vector] vector of boolean values
|
223
|
+
# @example
|
224
|
+
# dv = DaruLite::Index.new [1, 2, 3, :one, 'one']
|
225
|
+
# dv.is_values 1, 'one'
|
226
|
+
# # => #<DaruLite::Vector(5)>
|
227
|
+
# # 0 true
|
228
|
+
# # 1 false
|
229
|
+
# # 2 false
|
230
|
+
# # 3 false
|
231
|
+
# # 4 true
|
232
|
+
def is_values(*indexes) # rubocop:disable Naming/PredicateName
|
233
|
+
bool_array = @keys.map { |r| indexes.include?(r) }
|
234
|
+
DaruLite::Vector.new(bool_array)
|
235
|
+
end
|
236
|
+
|
237
|
+
def empty?
|
238
|
+
@size.zero?
|
239
|
+
end
|
240
|
+
|
241
|
+
def dup
|
242
|
+
DaruLite::Index.new @keys, name: @name
|
243
|
+
end
|
244
|
+
|
245
|
+
def add(*indexes)
|
246
|
+
DaruLite::Index.new(to_a + indexes)
|
247
|
+
end
|
248
|
+
|
249
|
+
def _dump(*)
|
250
|
+
Marshal.dump(relation_hash: @relation_hash)
|
251
|
+
end
|
252
|
+
|
253
|
+
def self._load(data)
|
254
|
+
h = Marshal.load data
|
255
|
+
|
256
|
+
DaruLite::Index.new(h[:relation_hash].keys)
|
257
|
+
end
|
258
|
+
|
259
|
+
# Provide an Index for sub vector produced
|
260
|
+
#
|
261
|
+
# @option * [Array] the input by user to index the vector
|
262
|
+
# @return [Object] the Index object for sub vector produced
|
263
|
+
def conform(*)
|
264
|
+
self
|
265
|
+
end
|
266
|
+
|
267
|
+
def reorder(new_order)
|
268
|
+
from = to_a
|
269
|
+
self.class.new(new_order.map { |i| from[i] })
|
270
|
+
end
|
271
|
+
|
272
|
+
# Sorts a `Index`, according to its values. Defaults to ascending order
|
273
|
+
# sorting.
|
274
|
+
#
|
275
|
+
# @param [Hash] opts the options for sort method.
|
276
|
+
# @option opts [Boolean] :ascending False, to get descending order.
|
277
|
+
#
|
278
|
+
# @return [Index] sorted `Index` according to its values.
|
279
|
+
#
|
280
|
+
# @example
|
281
|
+
# di = DaruLite::Index.new [100, 99, 101, 1, 2]
|
282
|
+
# # Say you want to sort in descending order
|
283
|
+
# di.sort(ascending: false) #=> DaruLite::Index.new [101, 100, 99, 2, 1]
|
284
|
+
# # Say you want to sort in ascending order
|
285
|
+
# di.sort #=> DaruLite::Index.new [1, 2, 99, 100, 101]
|
286
|
+
def sort(opts = {})
|
287
|
+
opts = { ascending: true }.merge(opts)
|
288
|
+
|
289
|
+
new_index = @keys.sort
|
290
|
+
new_index = new_index.reverse unless opts[:ascending]
|
291
|
+
|
292
|
+
self.class.new(new_index)
|
293
|
+
end
|
294
|
+
|
295
|
+
def to_df
|
296
|
+
DaruLite::DataFrame.new(name => to_a)
|
297
|
+
end
|
298
|
+
|
299
|
+
private
|
300
|
+
|
301
|
+
def guess_index(index)
|
302
|
+
case index
|
303
|
+
when nil
|
304
|
+
[]
|
305
|
+
when Integer
|
306
|
+
index.times.to_a
|
307
|
+
when Enumerable
|
308
|
+
index.to_a
|
309
|
+
else
|
310
|
+
raise ArgumentError,
|
311
|
+
"Cannot create index from #{index.class} #{index.inspect}"
|
312
|
+
end
|
313
|
+
end
|
314
|
+
|
315
|
+
def preprocess_range(rng)
|
316
|
+
start = rng.begin
|
317
|
+
en = rng.end
|
318
|
+
|
319
|
+
if start.is_a?(Integer) && en.is_a?(Integer)
|
320
|
+
@keys[start..en]
|
321
|
+
else
|
322
|
+
start_idx = @relation_hash[start]
|
323
|
+
en_idx = @relation_hash[en]
|
324
|
+
|
325
|
+
@keys[start_idx..en_idx]
|
326
|
+
end
|
327
|
+
end
|
328
|
+
|
329
|
+
def by_range(rng)
|
330
|
+
slice rng.begin, rng.end
|
331
|
+
end
|
332
|
+
|
333
|
+
def by_multi_key(*key)
|
334
|
+
key.map { |k| by_single_key k }
|
335
|
+
end
|
336
|
+
|
337
|
+
def by_single_key(key)
|
338
|
+
@relation_hash[key] if @relation_hash.key?(key)
|
339
|
+
end
|
340
|
+
|
341
|
+
# Raises IndexError when one of the positions is an invalid position
|
342
|
+
def validate_positions(*positions)
|
343
|
+
positions.each do |pos|
|
344
|
+
raise IndexError, "#{pos} is not a valid position." if pos >= size || pos < -size
|
345
|
+
end
|
346
|
+
end
|
347
|
+
|
348
|
+
# Preprocess ranges, integers and array in appropriate ways
|
349
|
+
def preprocess_positions(*positions)
|
350
|
+
if positions.size == 1
|
351
|
+
case positions.first
|
352
|
+
when Integer
|
353
|
+
positions.first
|
354
|
+
when Range
|
355
|
+
size.times.to_a[positions.first]
|
356
|
+
else
|
357
|
+
raise ArgumentError, 'Unkown position type.'
|
358
|
+
end
|
359
|
+
else
|
360
|
+
positions
|
361
|
+
end
|
362
|
+
end
|
363
|
+
|
364
|
+
def numeric_pos(key)
|
365
|
+
if @relation_hash.key?(key)
|
366
|
+
@relation_hash[key]
|
367
|
+
elsif key.is_a?(Numeric) && (key < size && key >= -size)
|
368
|
+
key
|
369
|
+
else
|
370
|
+
raise IndexError, "Specified index #{key.inspect} does not exist"
|
371
|
+
end
|
372
|
+
end
|
373
|
+
end
|
374
|
+
end
|