daru_lite 0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.github/ISSUE_TEMPLATE.md +18 -0
- data/.github/workflows/ci.yml +33 -0
- data/.gitignore +10 -0
- data/.rspec +2 -0
- data/.rubocop.yml +27 -0
- data/.rubocop_todo.yml +137 -0
- data/CONTRIBUTING.md +47 -0
- data/Gemfile +2 -0
- data/History.md +4 -0
- data/LICENSE +24 -0
- data/README.md +218 -0
- data/Rakefile +69 -0
- data/ReleasePolicy.md +20 -0
- data/benchmarks/TradeoffData.csv +65 -0
- data/benchmarks/csv_reading.rb +22 -0
- data/benchmarks/dataframe_creation.rb +39 -0
- data/benchmarks/db_loading.rb +34 -0
- data/benchmarks/duplicating.rb +45 -0
- data/benchmarks/group_by.rb +32 -0
- data/benchmarks/joining.rb +52 -0
- data/benchmarks/row_access.rb +41 -0
- data/benchmarks/row_assign.rb +36 -0
- data/benchmarks/sorting.rb +51 -0
- data/benchmarks/statistics.rb +28 -0
- data/benchmarks/vector_access.rb +31 -0
- data/benchmarks/vector_assign.rb +42 -0
- data/benchmarks/where_clause.rb +48 -0
- data/benchmarks/where_vs_filter.rb +28 -0
- data/daru_lite.gemspec +55 -0
- data/images/README.md +5 -0
- data/images/con0.png +0 -0
- data/images/con1.png +0 -0
- data/images/init0.png +0 -0
- data/images/init1.png +0 -0
- data/images/man0.png +0 -0
- data/images/man1.png +0 -0
- data/images/man2.png +0 -0
- data/images/man3.png +0 -0
- data/images/man4.png +0 -0
- data/images/man5.png +0 -0
- data/images/man6.png +0 -0
- data/lib/daru_lite/accessors/array_wrapper.rb +109 -0
- data/lib/daru_lite/accessors/dataframe_by_row.rb +25 -0
- data/lib/daru_lite/accessors/mdarray_wrapper.rb +7 -0
- data/lib/daru_lite/category.rb +929 -0
- data/lib/daru_lite/configuration.rb +34 -0
- data/lib/daru_lite/core/group_by.rb +403 -0
- data/lib/daru_lite/core/merge.rb +270 -0
- data/lib/daru_lite/core/query.rb +109 -0
- data/lib/daru_lite/dataframe.rb +3080 -0
- data/lib/daru_lite/date_time/index.rb +569 -0
- data/lib/daru_lite/date_time/offsets.rb +397 -0
- data/lib/daru_lite/exceptions.rb +2 -0
- data/lib/daru_lite/extensions/which_dsl.rb +53 -0
- data/lib/daru_lite/formatters/table.rb +52 -0
- data/lib/daru_lite/helpers/array.rb +53 -0
- data/lib/daru_lite/index/categorical_index.rb +201 -0
- data/lib/daru_lite/index/index.rb +374 -0
- data/lib/daru_lite/index/multi_index.rb +374 -0
- data/lib/daru_lite/io/csv/converters.rb +21 -0
- data/lib/daru_lite/io/io.rb +294 -0
- data/lib/daru_lite/io/sql_data_source.rb +97 -0
- data/lib/daru_lite/iruby/helpers.rb +38 -0
- data/lib/daru_lite/iruby/templates/dataframe.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/dataframe_mi.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/dataframe_mi_tbody.html.erb +35 -0
- data/lib/daru_lite/iruby/templates/dataframe_mi_thead.html.erb +21 -0
- data/lib/daru_lite/iruby/templates/dataframe_tbody.html.erb +28 -0
- data/lib/daru_lite/iruby/templates/dataframe_thead.html.erb +21 -0
- data/lib/daru_lite/iruby/templates/multi_index.html.erb +12 -0
- data/lib/daru_lite/iruby/templates/vector.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/vector_mi.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/vector_mi_tbody.html.erb +26 -0
- data/lib/daru_lite/iruby/templates/vector_mi_thead.html.erb +8 -0
- data/lib/daru_lite/iruby/templates/vector_tbody.html.erb +17 -0
- data/lib/daru_lite/iruby/templates/vector_thead.html.erb +8 -0
- data/lib/daru_lite/maths/arithmetic/dataframe.rb +91 -0
- data/lib/daru_lite/maths/arithmetic/vector.rb +117 -0
- data/lib/daru_lite/maths/statistics/dataframe.rb +202 -0
- data/lib/daru_lite/maths/statistics/vector.rb +1019 -0
- data/lib/daru_lite/monkeys.rb +56 -0
- data/lib/daru_lite/vector.rb +1678 -0
- data/lib/daru_lite/version.rb +3 -0
- data/lib/daru_lite.rb +99 -0
- data/profile/_base.rb +23 -0
- data/profile/df_to_a.rb +10 -0
- data/profile/filter.rb +13 -0
- data/profile/joining.rb +13 -0
- data/profile/sorting.rb +12 -0
- data/profile/vector_each_with_index.rb +9 -0
- data/profile/vector_new.rb +9 -0
- data/spec/accessors/array_wrapper_spec.rb +3 -0
- data/spec/category_spec.rb +1741 -0
- data/spec/core/group_by_spec.rb +655 -0
- data/spec/core/merge_spec.rb +179 -0
- data/spec/core/query_spec.rb +347 -0
- data/spec/daru_lite_spec.rb +22 -0
- data/spec/dataframe_spec.rb +4330 -0
- data/spec/date_time/data_spec.rb +197 -0
- data/spec/date_time/date_time_index_helper_spec.rb +72 -0
- data/spec/date_time/index_spec.rb +588 -0
- data/spec/date_time/offsets_spec.rb +465 -0
- data/spec/extensions/which_dsl_spec.rb +38 -0
- data/spec/fixtures/bank2.dat +200 -0
- data/spec/fixtures/boolean_converter_test.csv +5 -0
- data/spec/fixtures/countries.json +7794 -0
- data/spec/fixtures/duplicates.csv +32 -0
- data/spec/fixtures/eciresults.html +394 -0
- data/spec/fixtures/empties.dat +2 -0
- data/spec/fixtures/empty_rows_test.csv +17 -0
- data/spec/fixtures/macau.html +3691 -0
- data/spec/fixtures/macd_data.csv +150 -0
- data/spec/fixtures/matrix_test.csv +100 -0
- data/spec/fixtures/moneycontrol.html +6812 -0
- data/spec/fixtures/music_data.tsv +2501 -0
- data/spec/fixtures/repeated_fields.csv +7 -0
- data/spec/fixtures/sales-funnel.csv +18 -0
- data/spec/fixtures/scientific_notation.csv +4 -0
- data/spec/fixtures/string_converter_test.csv +5 -0
- data/spec/fixtures/strings.dat +2 -0
- data/spec/fixtures/test_xls.xls +0 -0
- data/spec/fixtures/test_xls_2.xls +0 -0
- data/spec/fixtures/url_test.txt~ +0 -0
- data/spec/fixtures/valid_markup.html +62 -0
- data/spec/fixtures/wiki_climate.html +1243 -0
- data/spec/fixtures/wiki_table_info.html +631 -0
- data/spec/formatters/table_formatter_spec.rb +137 -0
- data/spec/helpers_spec.rb +8 -0
- data/spec/index/categorical_index_spec.rb +170 -0
- data/spec/index/index_spec.rb +417 -0
- data/spec/index/multi_index_spec.rb +680 -0
- data/spec/io/io_spec.rb +373 -0
- data/spec/io/sql_data_source_spec.rb +56 -0
- data/spec/iruby/dataframe_spec.rb +170 -0
- data/spec/iruby/helpers_spec.rb +49 -0
- data/spec/iruby/multi_index_spec.rb +37 -0
- data/spec/iruby/vector_spec.rb +105 -0
- data/spec/maths/arithmetic/dataframe_spec.rb +148 -0
- data/spec/maths/arithmetic/vector_spec.rb +165 -0
- data/spec/maths/statistics/dataframe_spec.rb +178 -0
- data/spec/maths/statistics/vector_spec.rb +756 -0
- data/spec/monkeys_spec.rb +42 -0
- data/spec/shared/vector_display_spec.rb +213 -0
- data/spec/spec_helper.rb +87 -0
- data/spec/support/database_helper.rb +30 -0
- data/spec/support/matchers.rb +5 -0
- data/spec/vector_spec.rb +2293 -0
- metadata +571 -0
data/lib/daru_lite.rb
ADDED
@@ -0,0 +1,99 @@
|
|
1
|
+
# :nocov:
|
2
|
+
def jruby?
|
3
|
+
RUBY_ENGINE == 'jruby'
|
4
|
+
end
|
5
|
+
# :nocov:
|
6
|
+
|
7
|
+
module DaruLite
|
8
|
+
DAYS_OF_WEEK = {
|
9
|
+
'SUN' => 0,
|
10
|
+
'MON' => 1,
|
11
|
+
'TUE' => 2,
|
12
|
+
'WED' => 3,
|
13
|
+
'THU' => 4,
|
14
|
+
'FRI' => 5,
|
15
|
+
'SAT' => 6
|
16
|
+
}.freeze
|
17
|
+
|
18
|
+
MONTH_DAYS = {
|
19
|
+
1 => 31,
|
20
|
+
2 => 28,
|
21
|
+
3 => 31,
|
22
|
+
4 => 30,
|
23
|
+
5 => 31,
|
24
|
+
6 => 30,
|
25
|
+
7 => 31,
|
26
|
+
8 => 31,
|
27
|
+
9 => 30,
|
28
|
+
10 => 31,
|
29
|
+
11 => 30,
|
30
|
+
12 => 31
|
31
|
+
}.freeze
|
32
|
+
|
33
|
+
MISSING_VALUES = [nil, Float::NAN].freeze
|
34
|
+
|
35
|
+
@lazy_update = false
|
36
|
+
|
37
|
+
SPLIT_TOKEN = ','.freeze
|
38
|
+
|
39
|
+
@error_stream = $stderr
|
40
|
+
|
41
|
+
class << self
|
42
|
+
# A variable which will set whether Vector metadata is updated immediately or lazily.
|
43
|
+
# Call the #update method every time a values are set or removed in order to update
|
44
|
+
# metadata like positions of missing values.
|
45
|
+
attr_accessor :lazy_update, :error_stream
|
46
|
+
|
47
|
+
def create_has_library(library)
|
48
|
+
lib_underscore = library.to_s.tr('-', '_')
|
49
|
+
define_singleton_method(:"has_#{lib_underscore}?") do
|
50
|
+
cv = "@@#{lib_underscore}"
|
51
|
+
unless class_variable_defined? cv
|
52
|
+
begin
|
53
|
+
require library.to_s
|
54
|
+
class_variable_set(cv, true)
|
55
|
+
rescue LoadError
|
56
|
+
# :nocov:
|
57
|
+
class_variable_set(cv, false)
|
58
|
+
# :nocov:
|
59
|
+
end
|
60
|
+
end
|
61
|
+
class_variable_get(cv)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def error(msg)
|
66
|
+
error_stream&.puts msg
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
autoload :CSV, 'csv'
|
72
|
+
require 'matrix'
|
73
|
+
require 'forwardable'
|
74
|
+
require 'erb'
|
75
|
+
require 'date'
|
76
|
+
|
77
|
+
require 'daru_lite/version'
|
78
|
+
|
79
|
+
require 'open-uri'
|
80
|
+
|
81
|
+
require 'daru_lite/index/index'
|
82
|
+
require 'daru_lite/index/multi_index'
|
83
|
+
require 'daru_lite/index/categorical_index'
|
84
|
+
|
85
|
+
require 'daru_lite/helpers/array'
|
86
|
+
require 'daru_lite/configuration'
|
87
|
+
require 'daru_lite/vector'
|
88
|
+
require 'daru_lite/dataframe'
|
89
|
+
require 'daru_lite/monkeys'
|
90
|
+
require 'daru_lite/formatters/table'
|
91
|
+
require 'daru_lite/iruby/helpers'
|
92
|
+
require 'daru_lite/exceptions'
|
93
|
+
|
94
|
+
require 'daru_lite/core/group_by'
|
95
|
+
require 'daru_lite/core/query'
|
96
|
+
require 'daru_lite/core/merge'
|
97
|
+
|
98
|
+
require 'daru_lite/date_time/offsets'
|
99
|
+
require 'daru_lite/date_time/index'
|
data/profile/_base.rb
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
$:.unshift File.expand_path("../../lib", __FILE__)
|
2
|
+
|
3
|
+
require 'ruby-prof'
|
4
|
+
require 'fileutils'
|
5
|
+
|
6
|
+
require 'daru_lite'
|
7
|
+
|
8
|
+
def __profile__(name = nil)
|
9
|
+
# infers name to be "sorting" when called from "profile/sorting.rb:10:in `<main>'"
|
10
|
+
name ||= caller.first.split(':').first.split('/').last.sub('.rb', '')
|
11
|
+
|
12
|
+
path = File.expand_path("../out/#{name}.html", __FILE__)
|
13
|
+
FileUtils.mkdir_p File.dirname(path)
|
14
|
+
|
15
|
+
RubyProf.start
|
16
|
+
|
17
|
+
yield
|
18
|
+
|
19
|
+
res = RubyProf.stop
|
20
|
+
RubyProf::GraphHtmlPrinter.new(res)
|
21
|
+
.print(File.open(path, 'w'))
|
22
|
+
|
23
|
+
end
|
data/profile/df_to_a.rb
ADDED
data/profile/filter.rb
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
require_relative '_base'
|
2
|
+
|
3
|
+
df = DaruLite::DataFrame.new({
|
4
|
+
a: [1,2,3,4,5,6]*1000,
|
5
|
+
b: ['a','b','c','d','e','f']*1000,
|
6
|
+
c: [11,22,33,44,55,66]*1000
|
7
|
+
}, index: (1..6000).to_a.shuffle)
|
8
|
+
|
9
|
+
__profile__ do
|
10
|
+
df.filter(:row) do |r|
|
11
|
+
r[:a] == 2 or r[:c] == 55
|
12
|
+
end
|
13
|
+
end
|
data/profile/joining.rb
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
require_relative '_base'
|
2
|
+
|
3
|
+
n = 40_000
|
4
|
+
keys = (1..(n)).to_a
|
5
|
+
base_data = { idx: 1.upto(n).to_a, keys: 1.upto(n).map { |v| keys[Random.rand(n)]}}
|
6
|
+
lookup_hash = keys.map { |k| [k, k * 100]}.to_h
|
7
|
+
|
8
|
+
base_data_df = DaruLite::DataFrame.new(base_data)
|
9
|
+
lookup_df = DaruLite::DataFrame.new(keys: lookup_hash.keys, values: lookup_hash.values)
|
10
|
+
|
11
|
+
__profile__ do
|
12
|
+
base_data_df.join(lookup_df, on: [:keys], how: :inner)
|
13
|
+
end
|
data/profile/sorting.rb
ADDED