rdatasets 0.4.2 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +3 -4
- data/README.md +10 -8
- data/lib/rdatasets.rb +48 -28
- data/lib/rdatasets/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3a0ea0983b1aa1676ee7fa9f3ef1d425a77dcbcebfe8076063937db80ab07a2e
|
4
|
+
data.tar.gz: 4ce3fef0dfd07140504bf7a4dcf6ae0f8d99f6be6f8f203532b07318ec1470c1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 11810491ce07714ee2a5f7c9799d51541b530f7138754f5f5478e5bdff779c3a44906d8fe6f39e9451dba377318b8990eb26648c8e58cabbdee9c51a4a48935f
|
7
|
+
data.tar.gz: bc66e82486aee75f2609421a2bc86fa39075b903ac88b58ee312fc54e7f69464735f15fcaea7abf7bf343bc4b64a4f581f01bc7761ae9bbed58561409567f01f
|
data/Gemfile.lock
CHANGED
@@ -1,20 +1,20 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
rdatasets (0.
|
4
|
+
rdatasets (0.5.0)
|
5
5
|
daru
|
6
6
|
|
7
7
|
GEM
|
8
8
|
remote: https://rubygems.org/
|
9
9
|
specs:
|
10
|
-
backports (3.
|
10
|
+
backports (3.12.0)
|
11
11
|
daru (0.2.1)
|
12
12
|
backports
|
13
13
|
packable (~> 1.3.9)
|
14
14
|
diff-lcs (1.3)
|
15
15
|
packable (1.3.10)
|
16
16
|
backports
|
17
|
-
parallel (1.
|
17
|
+
parallel (1.14.0)
|
18
18
|
parallel_tests (2.28.0)
|
19
19
|
parallel
|
20
20
|
rake (10.5.0)
|
@@ -34,7 +34,6 @@ GEM
|
|
34
34
|
|
35
35
|
PLATFORMS
|
36
36
|
ruby
|
37
|
-
x64-mingw32
|
38
37
|
|
39
38
|
DEPENDENCIES
|
40
39
|
bundler (~> 2.0)
|
data/README.md
CHANGED
@@ -16,20 +16,22 @@ under development
|
|
16
16
|
gem install rdatasets
|
17
17
|
```
|
18
18
|
|
19
|
-
```bash
|
20
|
-
git clone https://github.com/kojix2/rdatasets
|
21
|
-
cd rdatasets
|
22
|
-
bundle install
|
23
|
-
bundle exec rake install
|
24
|
-
```
|
25
|
-
|
26
19
|
## Usage
|
27
20
|
|
28
21
|
```ruby
|
29
22
|
require 'rdatasets'
|
30
23
|
df = Daru::DataFrame.from_rdatasets("datasets","iris")
|
31
|
-
df = RDatasets.load
|
24
|
+
df = RDatasets.load "datasets", "iris"
|
25
|
+
df = RDatasets.load :datasets, :iris
|
26
|
+
df = RDatasets.datasets.iris
|
32
27
|
# returns Daru::DataFrame
|
28
|
+
|
29
|
+
# available datasets
|
30
|
+
df = RDatasets.df
|
31
|
+
|
32
|
+
# search
|
33
|
+
RDatasets.search "diamonds"
|
34
|
+
RDatasets.search /diamonds/
|
33
35
|
```
|
34
36
|
|
35
37
|
## Development
|
data/lib/rdatasets.rb
CHANGED
@@ -3,6 +3,31 @@ require 'daru'
|
|
3
3
|
|
4
4
|
# Module for RDatasets
|
5
5
|
module RDatasets
|
6
|
+
class Package
|
7
|
+
def initialize(package_name)
|
8
|
+
@package_name = package_name
|
9
|
+
@datasets = RDatasets.package package_name
|
10
|
+
end
|
11
|
+
|
12
|
+
private
|
13
|
+
|
14
|
+
def method_missing(name)
|
15
|
+
return RDatasets.load @package_name, name if @datasets.include? name
|
16
|
+
|
17
|
+
super
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
private_constant :Package
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def self.method_missing(package_name)
|
26
|
+
return Package.new(package_name) if RDatasets.packages.include? package_name
|
27
|
+
|
28
|
+
super
|
29
|
+
end
|
30
|
+
|
6
31
|
module_function
|
7
32
|
|
8
33
|
# Load a certain dataset and returns a dataframe.
|
@@ -12,15 +37,15 @@ module RDatasets
|
|
12
37
|
def load(package_name, dataset_name = nil)
|
13
38
|
if dataset_name
|
14
39
|
file_path = filepath(package_name, dataset_name)
|
15
|
-
|
16
|
-
if original_index_is_sequential?
|
17
|
-
# `
|
18
|
-
|
19
|
-
|
40
|
+
dataframe = Daru::DataFrame.from_csv(file_path)
|
41
|
+
if original_index_is_sequential? dataframe
|
42
|
+
# `dataframe.set_index` is slow
|
43
|
+
dataframe.index = dataframe.at 0
|
44
|
+
dataframe.delete_vector dataframe.at(0).name
|
20
45
|
end
|
21
|
-
|
46
|
+
dataframe
|
22
47
|
else
|
23
|
-
|
48
|
+
package(package_name)
|
24
49
|
end
|
25
50
|
end
|
26
51
|
|
@@ -42,44 +67,39 @@ module RDatasets
|
|
42
67
|
|
43
68
|
# Display information of all data sets.
|
44
69
|
# @return [Daru::DataFrame]
|
45
|
-
def
|
70
|
+
def df
|
46
71
|
file_path = File.expand_path('../data/datasets.csv', __dir__)
|
47
72
|
Daru::DataFrame.from_csv(file_path)
|
48
73
|
end
|
49
74
|
|
50
|
-
#
|
51
|
-
# @
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
#
|
57
|
-
#
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
ds['Item'].to_a.map(&:to_sym)
|
63
|
-
else
|
64
|
-
datasets['Package'].to_a.uniq.map(&:to_sym)
|
65
|
-
end
|
75
|
+
# Show a list of all packages.
|
76
|
+
# @return [Array<Symbol>]
|
77
|
+
def packages
|
78
|
+
df['Package'].to_a.uniq.map(&:to_sym)
|
79
|
+
end
|
80
|
+
|
81
|
+
# Show a list of datasets included in the package.
|
82
|
+
# @param [String, Symbol] :R package name
|
83
|
+
# @return [Array<Symbol>]
|
84
|
+
def package(package_name)
|
85
|
+
ds = df.where(df['Package'].eq package_name.to_s)
|
86
|
+
ds['Item'].to_a.map(&:to_sym)
|
66
87
|
end
|
67
88
|
|
68
|
-
# Search available datasets. (items and titles)
|
89
|
+
# Search available datasets. (items and titles)
|
69
90
|
# If the argument is a string, ignore case.
|
70
91
|
# @param pattern [String, Regexp] :The pattern to search for
|
71
92
|
# @return [Daru::DataFrame]
|
72
93
|
def search(pattern)
|
73
94
|
pattern = /#{pattern}/i if pattern.is_a? String
|
74
|
-
|
75
|
-
datasets.filter(:row) do |row|
|
95
|
+
df.filter(:row) do |row|
|
76
96
|
row['Item'] =~ pattern || row['Title'] =~ pattern
|
77
97
|
end
|
78
98
|
end
|
79
99
|
|
80
100
|
# Check if the index of original r dataset is sequential.
|
81
101
|
def original_index_is_sequential?(dataframe)
|
82
|
-
dataframe
|
102
|
+
dataframe.at(0).to_a == [*1..dataframe.size]
|
83
103
|
end
|
84
104
|
private_class_method :original_index_is_sequential?
|
85
105
|
end
|
data/lib/rdatasets/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rdatasets
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- kojix2
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-03-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: daru
|