rabbit-slide-kou-data-science-rb 2017.5.19.1 → 2017.5.19.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8695ae18768ed7c379be63ca652892e35e6f375b
|
4
|
+
data.tar.gz: 18f3b2b84d73480199907dd6c6f7f07c96e175ff
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b2cad6aec92ce83e33c0db57466363103da2c8447e99520af768f8013153817465de9066a365d1b6f91998852b9621a71f62a8f7d3ea00584ecf20cd3a722e66
|
7
|
+
data.tar.gz: 4f48ab9fd025c9a79633a2fea320e177a45a5fe84641e9da09285225076787be3a592e50c332e4f698a1fa23be8d41f61fbd0fb7c5496f20bda9ec7e9118abe6
|
data/config.yaml
CHANGED
Binary file
|
@@ -79,7 +79,7 @@ Rubyでやりたい
|
|
79
79
|
* 例:PySparkが高速化
|
80
80
|
* 理由:Py🡘Javaのデータ交換コスト減
|
81
81
|
* Java・Python・R以外も活躍
|
82
|
-
* 例:Ruby・Go・Rust
|
82
|
+
* 例:Ruby・Lua・Julia・Go・Rust…
|
83
83
|
* 理由:低コストでデータ交換可能
|
84
84
|
|
85
85
|
= ArrowとRuby
|
@@ -108,7 +108,7 @@ Rubyでやりたい
|
|
108
108
|
|
109
109
|
= 今できること
|
110
110
|
|
111
|
-
* Python・R
|
111
|
+
* Python・R…とのデータ交換
|
112
112
|
* データ処理をいくらか
|
113
113
|
* Rubyの各種ライブラリー間での\n
|
114
114
|
データ交換
|
@@ -144,6 +144,25 @@ Rubyでやりたい
|
|
144
144
|
end
|
145
145
|
end
|
146
146
|
|
147
|
+
= Arrow:Lua
|
148
|
+
|
149
|
+
# rouge lua
|
150
|
+
-- LuaでArrow形式のpandasのデータを読み込み
|
151
|
+
-- Torchへの変換コードはArrowの公式リポジトリーにアリ
|
152
|
+
local lgi = require "lgi"
|
153
|
+
local Arrow = lgi.Arrow
|
154
|
+
|
155
|
+
local input_class = Arrow.MemoryMappedInputStream
|
156
|
+
local input = input_class.new("/tmp/pandas.arrow")
|
157
|
+
local reader = Arrow.RecordBatchFileReader.new(input)
|
158
|
+
for i = 0, reader:get_n_record_batches() - 1 do
|
159
|
+
local record_batch = reader:get_record_batch(i)
|
160
|
+
print(string.rep("=", 40))
|
161
|
+
print("record-batch["..i.."]:")
|
162
|
+
io.write(record_batch:to_string())
|
163
|
+
end
|
164
|
+
input:close()
|
165
|
+
|
147
166
|
= Feather:R
|
148
167
|
|
149
168
|
# rouge R
|
@@ -198,7 +217,7 @@ Rubyでやりたい
|
|
198
217
|
= 対応データ形式まとめ
|
199
218
|
|
200
219
|
* Arrow形式
|
201
|
-
* 各種言語(('note
|
220
|
+
* 各種言語(('note:(これから広く使われるはず)'))
|
202
221
|
* Feather形式
|
203
222
|
* Python・R専用
|
204
223
|
* Parquet形式
|
@@ -253,7 +272,7 @@ Rubyでやりたい
|
|
253
272
|
* 途中でメッセージを参照しないなら
|
254
273
|
* MessagePackからArrowに変える
|
255
274
|
* Arrowのまま出力先へ送る
|
256
|
-
* 途中でRuby
|
275
|
+
* 途中でRubyオブジェクトを作らない\n
|
257
276
|
(('note:シリアライズ・パースがなくなって速い!'))
|
258
277
|
|
259
278
|
= 多次元配列
|
@@ -263,7 +282,7 @@ Rubyでやりたい
|
|
263
282
|
(('note:(traditional multidimensional array objectと説明)'))
|
264
283
|
* C++実装ではサポート
|
265
284
|
* バインディングでは使える
|
266
|
-
* Python・Ruby…では使える
|
285
|
+
* Python・Ruby・Lua…では使える
|
267
286
|
|
268
287
|
= Tensor:Python
|
269
288
|
|
@@ -0,0 +1,15 @@
|
|
1
|
+
#!/usr/bin/env lua
|
2
|
+
|
3
|
+
local lgi = require "lgi"
|
4
|
+
local Arrow = lgi.Arrow
|
5
|
+
|
6
|
+
local input_class = Arrow.MemoryMappedInputStream
|
7
|
+
local input = input_class.new("/tmp/pandas.arrow")
|
8
|
+
local reader = Arrow.RecordBatchFileReader.new(input)
|
9
|
+
for i = 0, reader:get_n_record_batches() - 1 do
|
10
|
+
local record_batch = reader:get_record_batch(i)
|
11
|
+
print(string.rep("=", 40))
|
12
|
+
print("record-batch["..i.."]:")
|
13
|
+
io.write(record_batch:to_string())
|
14
|
+
end
|
15
|
+
input:close()
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rabbit-slide-kou-data-science-rb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2017.5.19.
|
4
|
+
version: 2017.5.19.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kouhei Sutou
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-05-
|
11
|
+
date: 2017-05-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rabbit
|
@@ -56,6 +56,7 @@ files:
|
|
56
56
|
- sample/filter-groonga.rb
|
57
57
|
- sample/read-feather.rb
|
58
58
|
- sample/read-groonga.py
|
59
|
+
- sample/read-pandas.lua
|
59
60
|
- sample/read-pandas.rb
|
60
61
|
- sample/read-parquet.rb
|
61
62
|
- sample/read-tensor.rb
|