daru_lite 0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/ISSUE_TEMPLATE.md +18 -0
- data/.github/workflows/ci.yml +33 -0
- data/.gitignore +10 -0
- data/.rspec +2 -0
- data/.rubocop.yml +27 -0
- data/.rubocop_todo.yml +137 -0
- data/CONTRIBUTING.md +47 -0
- data/Gemfile +2 -0
- data/History.md +4 -0
- data/LICENSE +24 -0
- data/README.md +218 -0
- data/Rakefile +69 -0
- data/ReleasePolicy.md +20 -0
- data/benchmarks/TradeoffData.csv +65 -0
- data/benchmarks/csv_reading.rb +22 -0
- data/benchmarks/dataframe_creation.rb +39 -0
- data/benchmarks/db_loading.rb +34 -0
- data/benchmarks/duplicating.rb +45 -0
- data/benchmarks/group_by.rb +32 -0
- data/benchmarks/joining.rb +52 -0
- data/benchmarks/row_access.rb +41 -0
- data/benchmarks/row_assign.rb +36 -0
- data/benchmarks/sorting.rb +51 -0
- data/benchmarks/statistics.rb +28 -0
- data/benchmarks/vector_access.rb +31 -0
- data/benchmarks/vector_assign.rb +42 -0
- data/benchmarks/where_clause.rb +48 -0
- data/benchmarks/where_vs_filter.rb +28 -0
- data/daru_lite.gemspec +55 -0
- data/images/README.md +5 -0
- data/images/con0.png +0 -0
- data/images/con1.png +0 -0
- data/images/init0.png +0 -0
- data/images/init1.png +0 -0
- data/images/man0.png +0 -0
- data/images/man1.png +0 -0
- data/images/man2.png +0 -0
- data/images/man3.png +0 -0
- data/images/man4.png +0 -0
- data/images/man5.png +0 -0
- data/images/man6.png +0 -0
- data/lib/daru_lite/accessors/array_wrapper.rb +109 -0
- data/lib/daru_lite/accessors/dataframe_by_row.rb +25 -0
- data/lib/daru_lite/accessors/mdarray_wrapper.rb +7 -0
- data/lib/daru_lite/category.rb +929 -0
- data/lib/daru_lite/configuration.rb +34 -0
- data/lib/daru_lite/core/group_by.rb +403 -0
- data/lib/daru_lite/core/merge.rb +270 -0
- data/lib/daru_lite/core/query.rb +109 -0
- data/lib/daru_lite/dataframe.rb +3080 -0
- data/lib/daru_lite/date_time/index.rb +569 -0
- data/lib/daru_lite/date_time/offsets.rb +397 -0
- data/lib/daru_lite/exceptions.rb +2 -0
- data/lib/daru_lite/extensions/which_dsl.rb +53 -0
- data/lib/daru_lite/formatters/table.rb +52 -0
- data/lib/daru_lite/helpers/array.rb +53 -0
- data/lib/daru_lite/index/categorical_index.rb +201 -0
- data/lib/daru_lite/index/index.rb +374 -0
- data/lib/daru_lite/index/multi_index.rb +374 -0
- data/lib/daru_lite/io/csv/converters.rb +21 -0
- data/lib/daru_lite/io/io.rb +294 -0
- data/lib/daru_lite/io/sql_data_source.rb +97 -0
- data/lib/daru_lite/iruby/helpers.rb +38 -0
- data/lib/daru_lite/iruby/templates/dataframe.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/dataframe_mi.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/dataframe_mi_tbody.html.erb +35 -0
- data/lib/daru_lite/iruby/templates/dataframe_mi_thead.html.erb +21 -0
- data/lib/daru_lite/iruby/templates/dataframe_tbody.html.erb +28 -0
- data/lib/daru_lite/iruby/templates/dataframe_thead.html.erb +21 -0
- data/lib/daru_lite/iruby/templates/multi_index.html.erb +12 -0
- data/lib/daru_lite/iruby/templates/vector.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/vector_mi.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/vector_mi_tbody.html.erb +26 -0
- data/lib/daru_lite/iruby/templates/vector_mi_thead.html.erb +8 -0
- data/lib/daru_lite/iruby/templates/vector_tbody.html.erb +17 -0
- data/lib/daru_lite/iruby/templates/vector_thead.html.erb +8 -0
- data/lib/daru_lite/maths/arithmetic/dataframe.rb +91 -0
- data/lib/daru_lite/maths/arithmetic/vector.rb +117 -0
- data/lib/daru_lite/maths/statistics/dataframe.rb +202 -0
- data/lib/daru_lite/maths/statistics/vector.rb +1019 -0
- data/lib/daru_lite/monkeys.rb +56 -0
- data/lib/daru_lite/vector.rb +1678 -0
- data/lib/daru_lite/version.rb +3 -0
- data/lib/daru_lite.rb +99 -0
- data/profile/_base.rb +23 -0
- data/profile/df_to_a.rb +10 -0
- data/profile/filter.rb +13 -0
- data/profile/joining.rb +13 -0
- data/profile/sorting.rb +12 -0
- data/profile/vector_each_with_index.rb +9 -0
- data/profile/vector_new.rb +9 -0
- data/spec/accessors/array_wrapper_spec.rb +3 -0
- data/spec/category_spec.rb +1741 -0
- data/spec/core/group_by_spec.rb +655 -0
- data/spec/core/merge_spec.rb +179 -0
- data/spec/core/query_spec.rb +347 -0
- data/spec/daru_lite_spec.rb +22 -0
- data/spec/dataframe_spec.rb +4330 -0
- data/spec/date_time/data_spec.rb +197 -0
- data/spec/date_time/date_time_index_helper_spec.rb +72 -0
- data/spec/date_time/index_spec.rb +588 -0
- data/spec/date_time/offsets_spec.rb +465 -0
- data/spec/extensions/which_dsl_spec.rb +38 -0
- data/spec/fixtures/bank2.dat +200 -0
- data/spec/fixtures/boolean_converter_test.csv +5 -0
- data/spec/fixtures/countries.json +7794 -0
- data/spec/fixtures/duplicates.csv +32 -0
- data/spec/fixtures/eciresults.html +394 -0
- data/spec/fixtures/empties.dat +2 -0
- data/spec/fixtures/empty_rows_test.csv +17 -0
- data/spec/fixtures/macau.html +3691 -0
- data/spec/fixtures/macd_data.csv +150 -0
- data/spec/fixtures/matrix_test.csv +100 -0
- data/spec/fixtures/moneycontrol.html +6812 -0
- data/spec/fixtures/music_data.tsv +2501 -0
- data/spec/fixtures/repeated_fields.csv +7 -0
- data/spec/fixtures/sales-funnel.csv +18 -0
- data/spec/fixtures/scientific_notation.csv +4 -0
- data/spec/fixtures/string_converter_test.csv +5 -0
- data/spec/fixtures/strings.dat +2 -0
- data/spec/fixtures/test_xls.xls +0 -0
- data/spec/fixtures/test_xls_2.xls +0 -0
- data/spec/fixtures/url_test.txt~ +0 -0
- data/spec/fixtures/valid_markup.html +62 -0
- data/spec/fixtures/wiki_climate.html +1243 -0
- data/spec/fixtures/wiki_table_info.html +631 -0
- data/spec/formatters/table_formatter_spec.rb +137 -0
- data/spec/helpers_spec.rb +8 -0
- data/spec/index/categorical_index_spec.rb +170 -0
- data/spec/index/index_spec.rb +417 -0
- data/spec/index/multi_index_spec.rb +680 -0
- data/spec/io/io_spec.rb +373 -0
- data/spec/io/sql_data_source_spec.rb +56 -0
- data/spec/iruby/dataframe_spec.rb +170 -0
- data/spec/iruby/helpers_spec.rb +49 -0
- data/spec/iruby/multi_index_spec.rb +37 -0
- data/spec/iruby/vector_spec.rb +105 -0
- data/spec/maths/arithmetic/dataframe_spec.rb +148 -0
- data/spec/maths/arithmetic/vector_spec.rb +165 -0
- data/spec/maths/statistics/dataframe_spec.rb +178 -0
- data/spec/maths/statistics/vector_spec.rb +756 -0
- data/spec/monkeys_spec.rb +42 -0
- data/spec/shared/vector_display_spec.rb +213 -0
- data/spec/spec_helper.rb +87 -0
- data/spec/support/database_helper.rb +30 -0
- data/spec/support/matchers.rb +5 -0
- data/spec/vector_spec.rb +2293 -0
- metadata +571 -0
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
module DaruLite
|
|
2
|
+
class CategoricalIndex < Index
|
|
3
|
+
# Create a categorical index object.
|
|
4
|
+
# @param indexes [Array<object>] array of indexes
|
|
5
|
+
# @return [DaruLite::CategoricalIndex] categorical index
|
|
6
|
+
# @example
|
|
7
|
+
# DaruLite::CategoricalIndex.new [:a, 1, :a, 1, :c]
|
|
8
|
+
# # => #<DaruLite::CategoricalIndex(5): {a, 1, a, 1, c}>
|
|
9
|
+
def initialize(indexes)
|
|
10
|
+
# Create a hash to map each category to positional indexes
|
|
11
|
+
categories = indexes.each_with_index.group_by(&:first)
|
|
12
|
+
@cat_hash = categories.transform_values { |group| group.map(&:last) }
|
|
13
|
+
|
|
14
|
+
# Map each category to a unique integer for effective storage in @array
|
|
15
|
+
map_cat_int = categories.keys.each_with_index.to_h
|
|
16
|
+
|
|
17
|
+
# To link every instance to its category,
|
|
18
|
+
# it stores integer for every instance representing its category
|
|
19
|
+
@array = map_cat_int.values_at(*indexes)
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# Duplicates the index object and return it
|
|
23
|
+
# @return [DaruLite::CategoricalIndex] duplicated index object
|
|
24
|
+
def dup
|
|
25
|
+
# Improve it by intializing index by hash
|
|
26
|
+
DaruLite::CategoricalIndex.new to_a
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Returns true index or category is valid
|
|
30
|
+
# @param index [object] the index value to look for
|
|
31
|
+
# @return [true, false] true if index is included, false otherwise
|
|
32
|
+
def include?(index)
|
|
33
|
+
@cat_hash.include? index
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Returns array of categories
|
|
37
|
+
# @example
|
|
38
|
+
# x = DaruLite::CategoricalIndex.new [:a, 1, :a, 1, :c]
|
|
39
|
+
# x.categories
|
|
40
|
+
# # => [:a, 1, :c]
|
|
41
|
+
def categories
|
|
42
|
+
@cat_hash.keys
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Returns positions given categories or positions
|
|
46
|
+
# @note If the argument does not a valid category it treats it as position
|
|
47
|
+
# value and return it as it is.
|
|
48
|
+
# @param indexes [Array<object>] categories or positions
|
|
49
|
+
# @example
|
|
50
|
+
# x = DaruLite::CategoricalIndex.new [:a, 1, :a, 1, :c]
|
|
51
|
+
# x.pos :a, 1
|
|
52
|
+
# # => [0, 1, 2, 3]
|
|
53
|
+
def pos(*indexes)
|
|
54
|
+
positions = indexes.map do |index|
|
|
55
|
+
if include? index
|
|
56
|
+
@cat_hash[index]
|
|
57
|
+
elsif index.is_a?(Numeric) && index < @array.size
|
|
58
|
+
index
|
|
59
|
+
else
|
|
60
|
+
raise IndexError, "#{index.inspect} is neither a valid category nor a valid position"
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
positions.flatten!
|
|
65
|
+
positions.size == 1 ? positions.first : positions.sort
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Returns index value from position
|
|
69
|
+
# @param pos [Integer] the position to look for
|
|
70
|
+
# @return [object] category corresponding to position
|
|
71
|
+
# @example
|
|
72
|
+
# idx = DaruLite::CategoricalIndex.new [:a, :b, :a, :b, :c]
|
|
73
|
+
# idx.index_from_pos 1
|
|
74
|
+
# # => :b
|
|
75
|
+
def index_from_pos(pos)
|
|
76
|
+
cat_from_int @array[pos]
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# Returns enumerator enumerating all index values in the order they occur
|
|
80
|
+
# @return [Enumerator] all index values
|
|
81
|
+
# @example
|
|
82
|
+
# idx = DaruLite::CategoricalIndex.new [:a, :a, :b]
|
|
83
|
+
# idx.each.to_a
|
|
84
|
+
# # => [:a, :a, :b]
|
|
85
|
+
def each
|
|
86
|
+
return enum_for(:each) unless block_given?
|
|
87
|
+
|
|
88
|
+
@array.each { |pos| yield cat_from_int pos }
|
|
89
|
+
self
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# Compares two index object. Returns true if every instance of category
|
|
93
|
+
# occur at the same position
|
|
94
|
+
# @param [DaruLite::CateogricalIndex] other index object to be checked against
|
|
95
|
+
# @return [true, false] true if other is similar to self
|
|
96
|
+
# @example
|
|
97
|
+
# a = DaruLite::CategoricalIndex.new [:a, :a, :b]
|
|
98
|
+
# b = DaruLite::CategoricalIndex.new [:b, :a, :a]
|
|
99
|
+
# a == b
|
|
100
|
+
# # => false
|
|
101
|
+
def ==(other)
|
|
102
|
+
self.class == other.class &&
|
|
103
|
+
size == other.size &&
|
|
104
|
+
to_h == other.to_h
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# Returns all the index values
|
|
108
|
+
# @return [Array] all index values
|
|
109
|
+
# @example
|
|
110
|
+
# idx = DaruLite::CategoricalIndex.new [:a, :b, :a]
|
|
111
|
+
# idx.to_a
|
|
112
|
+
def to_a
|
|
113
|
+
each.to_a
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
# Returns hash table mapping category to positions at which they occur
|
|
117
|
+
# @return [Hash] hash table mapping category to array of positions
|
|
118
|
+
# @example
|
|
119
|
+
# idx = DaruLite::CategoricalIndex.new [:a, :b, :a]
|
|
120
|
+
# idx.to_h
|
|
121
|
+
# # => {:a=>[0, 2], :b=>[1]}
|
|
122
|
+
def to_h
|
|
123
|
+
@cat_hash
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
# Returns size of the index object
|
|
127
|
+
# @return [Integer] total number of instances of all categories
|
|
128
|
+
# @example
|
|
129
|
+
# idx = DaruLite::CategoricalIndex.new [:a, :b, :a]
|
|
130
|
+
# idx.size
|
|
131
|
+
# # => 3
|
|
132
|
+
def size
|
|
133
|
+
@array.size
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
# Returns true if index object is storing no category
|
|
137
|
+
# @return [true, false] true if index object is empty
|
|
138
|
+
# @example
|
|
139
|
+
# i = DaruLite::CategoricalIndex.new []
|
|
140
|
+
# # => #<DaruLite::CategoricalIndex(0): {}>
|
|
141
|
+
# i.empty?
|
|
142
|
+
# # => true
|
|
143
|
+
def empty?
|
|
144
|
+
@array.empty?
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
# Return subset given categories or positions
|
|
148
|
+
# @param indexes [Array<object>] categories or positions
|
|
149
|
+
# @return [DaruLite::CategoricalIndex] subset of the self containing the
|
|
150
|
+
# mentioned categories or positions
|
|
151
|
+
# @example
|
|
152
|
+
# idx = DaruLite::CategoricalIndex.new [:a, :b, :a, :b, :c]
|
|
153
|
+
# idx.subset :a, :b
|
|
154
|
+
# # => #<DaruLite::CategoricalIndex(4): {a, b, a, b}>
|
|
155
|
+
def subset(*indexes)
|
|
156
|
+
positions = pos(*indexes)
|
|
157
|
+
new_index = positions.map { |pos| index_from_pos pos }
|
|
158
|
+
|
|
159
|
+
DaruLite::CategoricalIndex.new new_index.flatten
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
# Takes positional values and returns subset of the self
|
|
163
|
+
# capturing the categories at mentioned positions
|
|
164
|
+
# @param positions [Array<Integer>] positional values
|
|
165
|
+
# @return [object] index object
|
|
166
|
+
# @example
|
|
167
|
+
# idx = DaruLite::CategoricalIndex.new [:a, :b, :a, :b, :c]
|
|
168
|
+
# idx.at 0, 1
|
|
169
|
+
# # => #<DaruLite::CategoricalIndex(2): {a, b}>
|
|
170
|
+
def at(*positions)
|
|
171
|
+
positions = preprocess_positions(*positions)
|
|
172
|
+
validate_positions(*positions)
|
|
173
|
+
if positions.is_a? Integer
|
|
174
|
+
index_from_pos(positions)
|
|
175
|
+
else
|
|
176
|
+
DaruLite::CategoricalIndex.new(positions.map { |p| index_from_pos(p) })
|
|
177
|
+
end
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
# Add specified index values to the index object
|
|
181
|
+
# @param indexes [Array<object>] index values to add
|
|
182
|
+
# @return [DaruLite::CategoricalIndex] index object with added values
|
|
183
|
+
# @example
|
|
184
|
+
# idx = DaruLite::CategoricalIndex.new [:a, :b, :a, :b, :c]
|
|
185
|
+
# idx.add :d
|
|
186
|
+
# # => #<DaruLite::CategoricalIndex(6): {a, b, a, b, c, d}>
|
|
187
|
+
def add(*indexes)
|
|
188
|
+
DaruLite::CategoricalIndex.new(to_a + indexes)
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
private
|
|
192
|
+
|
|
193
|
+
def int_from_cat(cat)
|
|
194
|
+
@cat_hash.keys.index cat
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
def cat_from_int(cat)
|
|
198
|
+
@cat_hash.keys[cat]
|
|
199
|
+
end
|
|
200
|
+
end
|
|
201
|
+
end
|
|
@@ -0,0 +1,374 @@
|
|
|
1
|
+
module DaruLite
|
|
2
|
+
class Index # rubocop:disable Metrics/ClassLength
|
|
3
|
+
include Enumerable
|
|
4
|
+
# It so happens that over riding the .new method in a super class also
|
|
5
|
+
# tampers with the default .new method for class that inherit from the
|
|
6
|
+
# super class (Index in this case). Thus we first alias the original
|
|
7
|
+
# new method (from Object) to __new__ when the Index class is evaluated,
|
|
8
|
+
# and then we use an inherited hook such that the old new method (from
|
|
9
|
+
# Object) is once again the default .new for the subclass.
|
|
10
|
+
# Refer http://blog.sidu.in/2007/12/rubys-new-as-factory.html
|
|
11
|
+
class << self
|
|
12
|
+
alias __new__ new
|
|
13
|
+
|
|
14
|
+
def inherited(subclass)
|
|
15
|
+
class << subclass
|
|
16
|
+
alias_method :new, :__new__
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
# We over-ride the .new method so that any sort of Index can be generated
|
|
22
|
+
# from DaruLite::Index based on the types of arguments supplied.
|
|
23
|
+
def self.new(*args, &block)
|
|
24
|
+
# FIXME: I'm not sure this clever trick really deserves our attention.
|
|
25
|
+
# Most of common ruby libraries just avoid it in favor of usual
|
|
26
|
+
# factor method, like `Index.create`. When `Index.new(...).class != Index`
|
|
27
|
+
# it just leads to confusion and surprises. - zverok, 2016-05-18
|
|
28
|
+
source = args.first
|
|
29
|
+
|
|
30
|
+
MultiIndex.try_from_tuples(source) ||
|
|
31
|
+
DateTimeIndex.try_create(source) ||
|
|
32
|
+
allocate.tap { |i| i.send :initialize, *args, &block }
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def self.coerce(maybe_index)
|
|
36
|
+
maybe_index.is_a?(Index) ? maybe_index : DaruLite::Index.new(maybe_index)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def each(&block)
|
|
40
|
+
return to_enum(:each) unless block
|
|
41
|
+
|
|
42
|
+
@relation_hash.each_key(&block)
|
|
43
|
+
self
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
attr_reader :relation_hash, :size
|
|
47
|
+
attr_accessor :name
|
|
48
|
+
|
|
49
|
+
# @example
|
|
50
|
+
#
|
|
51
|
+
# idx = DaruLite::Index.new [:one, 'one', 1, 2, :two]
|
|
52
|
+
# => #<DaruLite::Index(5): {one, one, 1, 2, two}>
|
|
53
|
+
#
|
|
54
|
+
# # set the name
|
|
55
|
+
#
|
|
56
|
+
# idx.name = "index_name"
|
|
57
|
+
# => "index_name"
|
|
58
|
+
#
|
|
59
|
+
# idx
|
|
60
|
+
# => #<DaruLite::Index(5): index_name {one, one, 1, 2, two}>
|
|
61
|
+
#
|
|
62
|
+
# # set the name during initialization
|
|
63
|
+
#
|
|
64
|
+
# idx = DaruLite::Index.new [:one, 'one', 1, 2, :two], name: "index_name"
|
|
65
|
+
# => #<DaruLite::Index(5): index_name {one, one, 1, 2, two}>
|
|
66
|
+
def initialize(index, opts = {})
|
|
67
|
+
index = guess_index index
|
|
68
|
+
@relation_hash = index.each_with_index.to_h.freeze
|
|
69
|
+
@keys = @relation_hash.keys
|
|
70
|
+
@size = @relation_hash.size
|
|
71
|
+
@name = opts[:name]
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def ==(other)
|
|
75
|
+
return false if self.class != other.class || other.size != @size
|
|
76
|
+
|
|
77
|
+
@keys == other.to_a &&
|
|
78
|
+
@relation_hash.values == other.relation_hash.values
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def [](key, *rest)
|
|
82
|
+
if key.is_a?(Range)
|
|
83
|
+
by_range key
|
|
84
|
+
elsif !rest.empty?
|
|
85
|
+
by_multi_key key, *rest
|
|
86
|
+
else
|
|
87
|
+
by_single_key key
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# Returns true if all arguments are either a valid category or position
|
|
92
|
+
# @param indexes [Array<object>] categories or positions
|
|
93
|
+
# @return [true, false]
|
|
94
|
+
# @example
|
|
95
|
+
# idx.valid? :a, 2
|
|
96
|
+
# # => true
|
|
97
|
+
# idx.valid? 3
|
|
98
|
+
# # => false
|
|
99
|
+
def valid?(*indexes)
|
|
100
|
+
indexes.all? { |i| to_a.include?(i) || (i.is_a?(Numeric) && i < size) }
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
# Returns positions given indexes or positions
|
|
104
|
+
# @note If the arugent is both a valid index and a valid position,
|
|
105
|
+
# it will treated as valid index
|
|
106
|
+
# @param indexes [Array<object>] indexes or positions
|
|
107
|
+
# @example
|
|
108
|
+
# x = DaruLite::Index.new [:a, :b, :c]
|
|
109
|
+
# x.pos :a, 1
|
|
110
|
+
# # => [0, 1]
|
|
111
|
+
def pos(*indexes)
|
|
112
|
+
indexes = preprocess_range(indexes.first) if indexes.first.is_a? Range
|
|
113
|
+
|
|
114
|
+
if indexes.size == 1
|
|
115
|
+
numeric_pos indexes.first
|
|
116
|
+
else
|
|
117
|
+
indexes.map { |index| numeric_pos index }
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def subset(*indexes)
|
|
122
|
+
if indexes.first.is_a? Range
|
|
123
|
+
start = indexes.first.begin
|
|
124
|
+
en = indexes.first.end
|
|
125
|
+
|
|
126
|
+
subset_slice start, en
|
|
127
|
+
elsif include? indexes.first
|
|
128
|
+
# Assume 'indexes' contain indexes not positions
|
|
129
|
+
DaruLite::Index.new indexes
|
|
130
|
+
else
|
|
131
|
+
# Assume 'indexes' contain positions not indexes
|
|
132
|
+
DaruLite::Index.new(indexes.map { |k| key k })
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
# Takes positional values and returns subset of the self
|
|
137
|
+
# capturing the indexes at mentioned positions
|
|
138
|
+
# @param positions [Array<Integer>] positional values
|
|
139
|
+
# @return [object] index object
|
|
140
|
+
# @example
|
|
141
|
+
# idx = DaruLite::Index.new [:a, :b, :c]
|
|
142
|
+
# idx.at 0, 1
|
|
143
|
+
# # => #<DaruLite::Index(2): {a, b}>
|
|
144
|
+
def at(*positions)
|
|
145
|
+
positions = preprocess_positions(*positions)
|
|
146
|
+
validate_positions(*positions)
|
|
147
|
+
if positions.is_a? Integer
|
|
148
|
+
key(positions)
|
|
149
|
+
else
|
|
150
|
+
self.class.new(positions.map { |v| key(v) })
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
def inspect(threshold = 20)
|
|
155
|
+
name_part = @name ? "#{@name} " : ''
|
|
156
|
+
if size <= threshold
|
|
157
|
+
"#<#{self.class}(#{size}): #{name_part}{#{to_a.join(', ')}}>"
|
|
158
|
+
else
|
|
159
|
+
"#<#{self.class}(#{size}): #{name_part}{#{to_a.first(threshold).join(', ')} ... #{to_a.last}}>"
|
|
160
|
+
end
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
def slice(*args)
|
|
164
|
+
start = args[0]
|
|
165
|
+
en = args[1]
|
|
166
|
+
|
|
167
|
+
start_idx = @relation_hash[start]
|
|
168
|
+
en_idx = @relation_hash[en]
|
|
169
|
+
|
|
170
|
+
if start_idx.nil?
|
|
171
|
+
nil
|
|
172
|
+
elsif en_idx.nil?
|
|
173
|
+
Array(start_idx..size - 1)
|
|
174
|
+
else
|
|
175
|
+
Array(start_idx..en_idx)
|
|
176
|
+
end
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
def subset_slice(*args)
|
|
180
|
+
start = args[0]
|
|
181
|
+
en = args[1]
|
|
182
|
+
|
|
183
|
+
if start.is_a?(Integer) && en.is_a?(Integer)
|
|
184
|
+
Index.new @keys[start..en]
|
|
185
|
+
else
|
|
186
|
+
start_idx = @relation_hash[start]
|
|
187
|
+
en_idx = @relation_hash[en]
|
|
188
|
+
Index.new @keys[start_idx..en_idx]
|
|
189
|
+
end
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
# Produce new index from the set union of two indexes.
|
|
193
|
+
def |(other)
|
|
194
|
+
Index.new(to_a | other.to_a)
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
# Produce a new index from the set intersection of two indexes
|
|
198
|
+
def &(other)
|
|
199
|
+
Index.new(to_a & other.to_a)
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
def to_a
|
|
203
|
+
@keys
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
def key(value)
|
|
207
|
+
return nil unless value.is_a?(Numeric)
|
|
208
|
+
|
|
209
|
+
@keys[value]
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
def include?(index)
|
|
213
|
+
@relation_hash.key? index
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
# @note Do not use it to check for Float::NAN as
|
|
217
|
+
# Float::NAN == Float::NAN is false
|
|
218
|
+
# Return vector of booleans with value at ith position is either
|
|
219
|
+
# true or false depending upon whether index value at position i is equal to
|
|
220
|
+
# any of the values passed in the argument or not
|
|
221
|
+
# @param indexes [Array] values to equate with
|
|
222
|
+
# @return [DaruLite::Vector] vector of boolean values
|
|
223
|
+
# @example
|
|
224
|
+
# dv = DaruLite::Index.new [1, 2, 3, :one, 'one']
|
|
225
|
+
# dv.is_values 1, 'one'
|
|
226
|
+
# # => #<DaruLite::Vector(5)>
|
|
227
|
+
# # 0 true
|
|
228
|
+
# # 1 false
|
|
229
|
+
# # 2 false
|
|
230
|
+
# # 3 false
|
|
231
|
+
# # 4 true
|
|
232
|
+
def is_values(*indexes) # rubocop:disable Naming/PredicateName
|
|
233
|
+
bool_array = @keys.map { |r| indexes.include?(r) }
|
|
234
|
+
DaruLite::Vector.new(bool_array)
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
def empty?
|
|
238
|
+
@size.zero?
|
|
239
|
+
end
|
|
240
|
+
|
|
241
|
+
def dup
|
|
242
|
+
DaruLite::Index.new @keys, name: @name
|
|
243
|
+
end
|
|
244
|
+
|
|
245
|
+
def add(*indexes)
|
|
246
|
+
DaruLite::Index.new(to_a + indexes)
|
|
247
|
+
end
|
|
248
|
+
|
|
249
|
+
def _dump(*)
|
|
250
|
+
Marshal.dump(relation_hash: @relation_hash)
|
|
251
|
+
end
|
|
252
|
+
|
|
253
|
+
def self._load(data)
|
|
254
|
+
h = Marshal.load data
|
|
255
|
+
|
|
256
|
+
DaruLite::Index.new(h[:relation_hash].keys)
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
# Provide an Index for sub vector produced
|
|
260
|
+
#
|
|
261
|
+
# @option * [Array] the input by user to index the vector
|
|
262
|
+
# @return [Object] the Index object for sub vector produced
|
|
263
|
+
def conform(*)
|
|
264
|
+
self
|
|
265
|
+
end
|
|
266
|
+
|
|
267
|
+
def reorder(new_order)
|
|
268
|
+
from = to_a
|
|
269
|
+
self.class.new(new_order.map { |i| from[i] })
|
|
270
|
+
end
|
|
271
|
+
|
|
272
|
+
# Sorts a `Index`, according to its values. Defaults to ascending order
|
|
273
|
+
# sorting.
|
|
274
|
+
#
|
|
275
|
+
# @param [Hash] opts the options for sort method.
|
|
276
|
+
# @option opts [Boolean] :ascending False, to get descending order.
|
|
277
|
+
#
|
|
278
|
+
# @return [Index] sorted `Index` according to its values.
|
|
279
|
+
#
|
|
280
|
+
# @example
|
|
281
|
+
# di = DaruLite::Index.new [100, 99, 101, 1, 2]
|
|
282
|
+
# # Say you want to sort in descending order
|
|
283
|
+
# di.sort(ascending: false) #=> DaruLite::Index.new [101, 100, 99, 2, 1]
|
|
284
|
+
# # Say you want to sort in ascending order
|
|
285
|
+
# di.sort #=> DaruLite::Index.new [1, 2, 99, 100, 101]
|
|
286
|
+
def sort(opts = {})
|
|
287
|
+
opts = { ascending: true }.merge(opts)
|
|
288
|
+
|
|
289
|
+
new_index = @keys.sort
|
|
290
|
+
new_index = new_index.reverse unless opts[:ascending]
|
|
291
|
+
|
|
292
|
+
self.class.new(new_index)
|
|
293
|
+
end
|
|
294
|
+
|
|
295
|
+
def to_df
|
|
296
|
+
DaruLite::DataFrame.new(name => to_a)
|
|
297
|
+
end
|
|
298
|
+
|
|
299
|
+
private
|
|
300
|
+
|
|
301
|
+
def guess_index(index)
|
|
302
|
+
case index
|
|
303
|
+
when nil
|
|
304
|
+
[]
|
|
305
|
+
when Integer
|
|
306
|
+
index.times.to_a
|
|
307
|
+
when Enumerable
|
|
308
|
+
index.to_a
|
|
309
|
+
else
|
|
310
|
+
raise ArgumentError,
|
|
311
|
+
"Cannot create index from #{index.class} #{index.inspect}"
|
|
312
|
+
end
|
|
313
|
+
end
|
|
314
|
+
|
|
315
|
+
def preprocess_range(rng)
|
|
316
|
+
start = rng.begin
|
|
317
|
+
en = rng.end
|
|
318
|
+
|
|
319
|
+
if start.is_a?(Integer) && en.is_a?(Integer)
|
|
320
|
+
@keys[start..en]
|
|
321
|
+
else
|
|
322
|
+
start_idx = @relation_hash[start]
|
|
323
|
+
en_idx = @relation_hash[en]
|
|
324
|
+
|
|
325
|
+
@keys[start_idx..en_idx]
|
|
326
|
+
end
|
|
327
|
+
end
|
|
328
|
+
|
|
329
|
+
def by_range(rng)
|
|
330
|
+
slice rng.begin, rng.end
|
|
331
|
+
end
|
|
332
|
+
|
|
333
|
+
def by_multi_key(*key)
|
|
334
|
+
key.map { |k| by_single_key k }
|
|
335
|
+
end
|
|
336
|
+
|
|
337
|
+
def by_single_key(key)
|
|
338
|
+
@relation_hash[key] if @relation_hash.key?(key)
|
|
339
|
+
end
|
|
340
|
+
|
|
341
|
+
# Raises IndexError when one of the positions is an invalid position
|
|
342
|
+
def validate_positions(*positions)
|
|
343
|
+
positions.each do |pos|
|
|
344
|
+
raise IndexError, "#{pos} is not a valid position." if pos >= size || pos < -size
|
|
345
|
+
end
|
|
346
|
+
end
|
|
347
|
+
|
|
348
|
+
# Preprocess ranges, integers and array in appropriate ways
|
|
349
|
+
def preprocess_positions(*positions)
|
|
350
|
+
if positions.size == 1
|
|
351
|
+
case positions.first
|
|
352
|
+
when Integer
|
|
353
|
+
positions.first
|
|
354
|
+
when Range
|
|
355
|
+
size.times.to_a[positions.first]
|
|
356
|
+
else
|
|
357
|
+
raise ArgumentError, 'Unkown position type.'
|
|
358
|
+
end
|
|
359
|
+
else
|
|
360
|
+
positions
|
|
361
|
+
end
|
|
362
|
+
end
|
|
363
|
+
|
|
364
|
+
def numeric_pos(key)
|
|
365
|
+
if @relation_hash.key?(key)
|
|
366
|
+
@relation_hash[key]
|
|
367
|
+
elsif key.is_a?(Numeric) && (key < size && key >= -size)
|
|
368
|
+
key
|
|
369
|
+
else
|
|
370
|
+
raise IndexError, "Specified index #{key.inspect} does not exist"
|
|
371
|
+
end
|
|
372
|
+
end
|
|
373
|
+
end
|
|
374
|
+
end
|