pycall 0.1.0.alpha.20170502 → 0.1.0.alpha.20170711

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,261 @@
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {
6
+ "deletable": true,
7
+ "editable": true
8
+ },
9
+ "source": [
10
+ "# Example of using feature importances calculated by ExtraTreesClassifier"
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "code",
15
+ "execution_count": 1,
16
+ "metadata": {
17
+ "collapsed": false,
18
+ "deletable": true,
19
+ "editable": true
20
+ },
21
+ "outputs": [],
22
+ "source": [
23
+ "require 'matplotlib/iruby'\n",
24
+ "Matplotlib::IRuby.activate\n",
25
+ "plt = Matplotlib::Pyplot\n",
26
+ "nil"
27
+ ]
28
+ },
29
+ {
30
+ "cell_type": "code",
31
+ "execution_count": 2,
32
+ "metadata": {
33
+ "collapsed": false,
34
+ "deletable": true,
35
+ "editable": true,
36
+ "scrolled": true
37
+ },
38
+ "outputs": [],
39
+ "source": [
40
+ "require 'pycall/import'\n",
41
+ "include PyCall::Import\n",
42
+ "pyimport 'numpy', as: 'np'\n",
43
+ "pyfrom 'sklearn.datasets', import: :fetch_olivetti_faces\n",
44
+ "pyfrom 'sklearn.datasets', import: :make_classification\n",
45
+ "pyfrom 'sklearn.ensemble', import: :ExtraTreesClassifier\n",
46
+ "nil"
47
+ ]
48
+ },
49
+ {
50
+ "cell_type": "markdown",
51
+ "metadata": {
52
+ "deletable": true,
53
+ "editable": true
54
+ },
55
+ "source": [
56
+ "## (1) Feature importances with forests of trees\n",
57
+ "\n",
58
+ "This section is based on the following content of the reference of scikit-learn:\n",
59
+ "\n",
60
+ "- http://scikit-learn.org/stable/auto_examples/ensemble/plot_forest_importances.html"
61
+ ]
62
+ },
63
+ {
64
+ "cell_type": "code",
65
+ "execution_count": 3,
66
+ "metadata": {
67
+ "collapsed": false,
68
+ "deletable": true,
69
+ "editable": true
70
+ },
71
+ "outputs": [
72
+ {
73
+ "name": "stdout",
74
+ "output_type": "stream",
75
+ "text": [
76
+ "Feature ranking:\n",
77
+ "1. feature 1 (0.295902)\n",
78
+ "2. feature 2 (0.208351)\n",
79
+ "3. feature 0 (0.177632)\n",
80
+ "4. feature 3 (0.047121)\n",
81
+ "5. feature 6 (0.046303)\n",
82
+ "6. feature 8 (0.046013)\n",
83
+ "7. feature 7 (0.045575)\n",
84
+ "8. feature 4 (0.044614)\n",
85
+ "9. feature 9 (0.044577)\n",
86
+ "10. feature 5 (0.043912)\n"
87
+ ]
88
+ },
89
+ {
90
+ "data": {
91
+ "image/png": "",
92
+ "text/plain": [
93
+ "<matplotlib.figure.Figure object at 0x108030f98>"
94
+ ]
95
+ },
96
+ "execution_count": 3,
97
+ "metadata": {},
98
+ "output_type": "execute_result"
99
+ },
100
+ {
101
+ "data": {
102
+ "text/plain": [
103
+ "(-1, 10)"
104
+ ]
105
+ },
106
+ "execution_count": 3,
107
+ "metadata": {},
108
+ "output_type": "execute_result"
109
+ }
110
+ ],
111
+ "source": [
112
+ "# Build a classification task using 3 informative features\n",
113
+ "x, y = make_classification.(\n",
114
+ " n_samples: 1000,\n",
115
+ " n_features: 10,\n",
116
+ " n_informative: 3,\n",
117
+ " n_redundant: 0,\n",
118
+ " n_repeated: 0,\n",
119
+ " n_classes: 2,\n",
120
+ " random_state: 0,\n",
121
+ " shuffle: false\n",
122
+ ")\n",
123
+ "\n",
124
+ "# Build a forest and compute the feature importances\n",
125
+ "forest = ExtraTreesClassifier.(n_estimators: 250, random_state: 0)\n",
126
+ "\n",
127
+ "forest.fit.(x, y)\n",
128
+ "importances = forest.feature_importances_\n",
129
+ "std = np.std.(forest.estimators_.map {|tree| tree.feature_importances_ }, axis: 0)\n",
130
+ "indices = np.argsort.(importances)[PyCall.slice(nil, nil, -1)]\n",
131
+ "\n",
132
+ "# Print the feature ranking\n",
133
+ "puts \"Feature ranking:\"\n",
134
+ "\n",
135
+ "x.shape[1].times do |f|\n",
136
+ " puts \"%d. feature %d (%f)\" % [f + 1, PyCall.int(indices[f]), importances[indices[f]]]\n",
137
+ "end\n",
138
+ "\n",
139
+ "# Plot the feature importances of the forest\n",
140
+ "plt.figure()\n",
141
+ "plt.title(\"Feature importances\")\n",
142
+ "plt.bar([*0...x.shape[1]], importances[indices],\n",
143
+ " color: \"r\", yerr: std[indices], align: \"center\")\n",
144
+ "plt.xticks([*0...x.shape[1]], indices)\n",
145
+ "plt.xlim([-1, x.shape[1]])"
146
+ ]
147
+ },
148
+ {
149
+ "cell_type": "markdown",
150
+ "metadata": {
151
+ "deletable": true,
152
+ "editable": true
153
+ },
154
+ "source": [
155
+ "## (2) Pixel importances with a parallel forest of trees\n",
156
+ "\n",
157
+ "This example is based on the following content of the reference of scikit-learn:\n",
158
+ "\n",
159
+ "- http://scikit-learn.org/stable/auto_examples/ensemble/plot_forest_importances_faces.html"
160
+ ]
161
+ },
162
+ {
163
+ "cell_type": "code",
164
+ "execution_count": 4,
165
+ "metadata": {
166
+ "collapsed": false,
167
+ "deletable": true,
168
+ "editable": true
169
+ },
170
+ "outputs": [
171
+ {
172
+ "name": "stdout",
173
+ "output_type": "stream",
174
+ "text": [
175
+ "Fitting ExtraTreesClassifier on faces data with 1 cores...\n",
176
+ "done in 1.304s\n"
177
+ ]
178
+ },
179
+ {
180
+ "data": {
181
+ "image/png": "",
182
+ "text/plain": [
183
+ "<matplotlib.figure.Figure object at 0x1099ed048>"
184
+ ]
185
+ },
186
+ "execution_count": 4,
187
+ "metadata": {},
188
+ "output_type": "execute_result"
189
+ },
190
+ {
191
+ "data": {
192
+ "text/plain": [
193
+ "<matplotlib.text.Text object at 0x109994d68>"
194
+ ]
195
+ },
196
+ "execution_count": 4,
197
+ "metadata": {},
198
+ "output_type": "execute_result"
199
+ }
200
+ ],
201
+ "source": [
202
+ "# Number of cores to use to perform parallel fitting of the forest model\n",
203
+ "n_jobs = 1\n",
204
+ "\n",
205
+ "# Load the faces datasets\n",
206
+ "data = fetch_olivetti_faces.()\n",
207
+ "x = data.images.reshape.(PyCall.tuple(PyCall.len(data.images), -1))\n",
208
+ "y = data.target\n",
209
+ "\n",
210
+ "mask = y < 5 # Limit to 5 classes\n",
211
+ "x = x[mask]\n",
212
+ "y = y[mask]\n",
213
+ "\n",
214
+ "# Build a forest and compute the pixel importances\n",
215
+ "puts \"Fitting ExtraTreesClassifier on faces data with #{n_jobs} cores...\"\n",
216
+ "t0 = Time.now\n",
217
+ "forest = ExtraTreesClassifier.(\n",
218
+ " n_estimators: 1_000,\n",
219
+ " max_features: 128,\n",
220
+ " n_jobs: n_jobs,\n",
221
+ " random_state: 0\n",
222
+ ")\n",
223
+ "\n",
224
+ "forest = forest.fit.(x, y)\n",
225
+ "puts \"done in %0.3fs\" % (Time.now - t0)\n",
226
+ "importances = forest.feature_importances_\n",
227
+ "importances = importances.reshape.(data.images[0].shape)\n",
228
+ "\n",
229
+ "# Plot pixel importances\n",
230
+ "plt.matshow(importances, cmap: plt.cm.hot)\n",
231
+ "plt.title(\"Pixel importances with forests of trees\")"
232
+ ]
233
+ },
234
+ {
235
+ "cell_type": "code",
236
+ "execution_count": null,
237
+ "metadata": {
238
+ "collapsed": true,
239
+ "deletable": true,
240
+ "editable": true
241
+ },
242
+ "outputs": [],
243
+ "source": []
244
+ }
245
+ ],
246
+ "metadata": {
247
+ "kernelspec": {
248
+ "display_name": "Ruby 2.4.0",
249
+ "language": "ruby",
250
+ "name": "ruby"
251
+ },
252
+ "language_info": {
253
+ "file_extension": ".rb",
254
+ "mimetype": "application/x-ruby",
255
+ "name": "ruby",
256
+ "version": "2.4.0"
257
+ }
258
+ },
259
+ "nbformat": 4,
260
+ "nbformat_minor": 2
261
+ }