rabbit-slide-kou-data-science-rb 2017.5.19.2 → 2017.5.19.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8695ae18768ed7c379be63ca652892e35e6f375b
4
- data.tar.gz: 18f3b2b84d73480199907dd6c6f7f07c96e175ff
3
+ metadata.gz: 43bab7a71fb5de0ab27f1b82377c5164aaa519ca
4
+ data.tar.gz: 5c7b4e2166efc143d8e43f2739957f6cfeea232d
5
5
  SHA512:
6
- metadata.gz: b2cad6aec92ce83e33c0db57466363103da2c8447e99520af768f8013153817465de9066a365d1b6f91998852b9621a71f62a8f7d3ea00584ecf20cd3a722e66
7
- data.tar.gz: 4f48ab9fd025c9a79633a2fea320e177a45a5fe84641e9da09285225076787be3a592e50c332e4f698a1fa23be8d41f61fbd0fb7c5496f20bda9ec7e9118abe6
6
+ metadata.gz: 69473a1db8461ad44afe52498c83590269a903d8144b33f3a0ab07ac939b04d22fcfc8cc523101d11e3d27a1a835716a187d02703060f27767acc540f989946f
7
+ data.tar.gz: 39eefdc301d3f32ec74f55e22a33f3970d7346e93baab62be512f349ecd593fe89818565611d3c6e615c0b586c5cd90e89441e9a78031f328e94521f32c3e07c
@@ -7,7 +7,7 @@ tags:
7
7
  - arrow
8
8
  - data
9
9
  presentation_date: 2017-05-19
10
- version: 2017.5.19.2
10
+ version: 2017.5.19.3
11
11
  licenses:
12
12
  - CC-BY-SA-4.0
13
13
  slideshare_id: datasciencerb
@@ -117,13 +117,14 @@ Rubyでやりたい
117
117
 
118
118
  # rouge python
119
119
  # pandasでデータ生成→Arrow形式で書き込み
120
+ import pandas as pd
120
121
  import pyarrow as pa
121
122
 
122
123
  df = pd.DataFrame({"a": [1, 2, 3],
123
124
  "b": ["hello", "world", "!"]})
124
125
  record_batch = pa.RecordBatch.from_pandas(df)
125
126
 
126
- with pa.OSFile("/tmp/pandas.arrow", "wb") as sink:
127
+ with pa.OSFile("/dev/shm/pandas.arrow", "wb") as sink:
127
128
  schema = record_batch.schema
128
129
  writer = pa.RecordBatchFileWriter(sink, schema)
129
130
  writer.write_batch(record_batch)
@@ -136,7 +137,7 @@ Rubyでやりたい
136
137
  require "arrow"
137
138
 
138
139
  Input = Arrow::MemoryMappedInputStream
139
- Input.open("/tmp/pandas.arrow") do |input|
140
+ Input.open("/dev/shm/pandas.arrow") do |input|
140
141
  reader = Arrow::RecordBatchFileReader.new(input)
141
142
  reader.each do |record_batch|
142
143
  puts("=" * 40)
@@ -153,7 +154,7 @@ Rubyでやりたい
153
154
  local Arrow = lgi.Arrow
154
155
 
155
156
  local input_class = Arrow.MemoryMappedInputStream
156
- local input = input_class.new("/tmp/pandas.arrow")
157
+ local input = input_class.new("/dev/shm/pandas.arrow")
157
158
  local reader = Arrow.RecordBatchFileReader.new(input)
158
159
  for i = 0, reader:get_n_record_batches() - 1 do
159
160
  local record_batch = reader:get_record_batch(i)
@@ -171,7 +172,7 @@ Rubyでやりたい
171
172
 
172
173
  df = data.frame(a=c(1, 2, 3),
173
174
  b=c(1.1, 2.2, 3.3))
174
- write_feather(df, "/tmp/dataframe.feather")
175
+ write_feather(df, "/dev/shm/dataframe.feather")
175
176
 
176
177
  = Feather:Ruby
177
178
 
@@ -180,7 +181,7 @@ Rubyでやりたい
180
181
  require "arrow"
181
182
 
182
183
  Input = Arrow::MemoryMappedInputStream
183
- Input.open("/tmp/dataframe.feather") do |input|
184
+ Input.open("/dev/shm/dataframe.feather") do |input|
184
185
  reader = Arrow::FeatherFileReader.new(input)
185
186
  reader.columns.each do |column|
186
187
  puts("#{column.name}: #{column.to_a.inspect}")
@@ -198,7 +199,7 @@ Rubyでやりたい
198
199
  df = pd.DataFrame({"a": [1, 2, 3],
199
200
  "b": ["hello", "world", "!"]})
200
201
  table = pa.Table.from_pandas(df)
201
- pq.write_table(table, "/tmp/pandas.parquet")
202
+ pq.write_table(table, "/dev/shm/pandas.parquet")
202
203
 
203
204
  = Parquet:Ruby
204
205
 
@@ -207,7 +208,7 @@ Rubyでやりたい
207
208
  require "arrow"
208
209
  require "parquet"
209
210
 
210
- path = "/tmp/pandas.parquet"
211
+ path = "/dev/shm/pandas.parquet"
211
212
  reader = Parquet::ArrowFileReader.new(path)
212
213
  table = reader.read_table
213
214
  table.each_column do |column|
@@ -237,14 +238,14 @@ Rubyでやりたい
237
238
  # rouge ruby
238
239
  # 空のテーブルにArrow形式のデータを読み込む
239
240
  logs = Groonga::Array.create(name: "logs")
240
- logs.load_arrow("/tmp/pandas.arrow")
241
+ logs.load_arrow("/dev/shm/pandas.arrow")
241
242
  logs.each {|record| p record.attributes}
242
243
  # フィルター
243
244
  filtered_logs = logs.select do |record|
244
245
  record.b =~ "hello" # "hello"で全文検索
245
246
  end
246
247
  # フィルター結果をArrow形式で書き込み
247
- filtered_logs.dump_arrow("/tmp/filtered.arrow",
248
+ filtered_logs.dump_arrow("/dev/shm/filtered.arrow",
248
249
  column_names: ["a", "b"])
249
250
 
250
251
  = Groonga:Python
@@ -253,7 +254,7 @@ Rubyでやりたい
253
254
  # Arrow形式のGroongaでのフィルター結果を読み込む
254
255
  import pyarrow as pa
255
256
 
256
- with pa.OSFile("/tmp/filtered.arrow") as source:
257
+ with pa.OSFile("/dev/shm/filtered.arrow") as source:
257
258
  writer = pa.RecordBatchFileReader(source)
258
259
  print(writer.get_record_batch(0).to_pandas())
259
260
 
@@ -294,7 +295,7 @@ Rubyでやりたい
294
295
  ndarray = np.random.randn(10, 6) # 10x6
295
296
  print(ndarray)
296
297
  tensor = pa.Tensor.from_numpy(ndarray)
297
- with pa.OSFile("/tmp/tensor.arrow", "wb") as sink:
298
+ with pa.OSFile("/dev/shm/tensor.arrow", "wb") as sink:
298
299
  pa.write_tensor(tensor, sink)
299
300
 
300
301
  = Tensor:Ruby
@@ -304,7 +305,7 @@ Rubyでやりたい
304
305
  require "arrow"
305
306
 
306
307
  Input = Arrow::MemoryMappedInputStream
307
- Input.open("/tmp/tensor.arrow") do |input|
308
+ Input.open("/dev/shm/tensor.arrow") do |input|
308
309
  tensor = input.read_tensor(0)
309
310
  p tensor.shape # => [10, 6]
310
311
  end
@@ -318,7 +319,7 @@ Rubyでやりたい
318
319
  require "pp"
319
320
 
320
321
  Input = Arrow::MemoryMappedInputStream
321
- Input.open("/tmp/tensor.arrow") do |input|
322
+ Input.open("/dev/shm/tensor.arrow") do |input|
322
323
  tensor = input.read_tensor(0)
323
324
  pp tensor.to_gsl
324
325
  # tensor.to_gsl.to_arrow == tensor
@@ -333,7 +334,7 @@ Rubyでやりたい
333
334
  require "pp"
334
335
 
335
336
  Input = Arrow::MemoryMappedInputStream
336
- Input.open("/tmp/tensor.arrow") do |input|
337
+ Input.open("/dev/shm/tensor.arrow") do |input|
337
338
  tensor = input.read_tensor(0)
338
339
  pp tensor.to_nmatrix
339
340
  # tensor.to_nmatrix.to_arrow == tensor
@@ -348,7 +349,7 @@ Rubyでやりたい
348
349
  require "pp"
349
350
 
350
351
  Input = Arrow::MemoryMappedInputStream
351
- Input.open("/tmp/tensor.arrow") do |input|
352
+ Input.open("/dev/shm/tensor.arrow") do |input|
352
353
  tensor = input.read_tensor(0)
353
354
  pp tensor.to_narray
354
355
  # tensor.to_narray.to_arrow == tensor
@@ -382,8 +383,8 @@ Rubyでやりたい
382
383
 
383
384
  * Rubyでデータ処理したいなぁ!\n
384
385
  の実現を目指すプロジェクト
385
- * URL:
386
- * https://github.io/red-data-tools
386
+ * リソース:
387
+ * GitHub: red-data-tools
387
388
  * https://red-data-tools.github.io
388
389
  * https://gitter.im/red-data-tools
389
390
 
@@ -457,7 +458,7 @@ Because we use Ruby!
457
458
 
458
459
  * Rubyでデータ処理したい人!
459
460
  * ポリシーに同意できる人!
460
- * URL:
461
- * https://github.io/red-data-tools
461
+ * リソース:
462
+ * GitHub: red-data-tools
462
463
  * https://red-data-tools.github.io
463
464
  * https://gitter.im/red-data-tools
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "arrow"
4
+ require "arrow-gsl"
5
+
6
+ require "pp"
7
+
8
+ Arrow::MemoryMappedInputStream.open("/tmp/tensor.arrow") do |input|
9
+ tensor = input.read_tensor(0)
10
+ pp tensor.to_gsl
11
+ end
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "arrow"
4
+ require "arrow-nmatrix"
5
+
6
+ require "pp"
7
+
8
+ Arrow::MemoryMappedInputStream.open("/tmp/tensor.arrow") do |input|
9
+ tensor = input.read_tensor(0)
10
+ pp tensor.to_nmatrix
11
+ end
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "arrow"
4
+ require "arrow-numo-narray"
5
+
6
+ require "pp"
7
+
8
+ Arrow::MemoryMappedInputStream.open("/tmp/tensor.arrow") do |input|
9
+ tensor = input.read_tensor(0)
10
+ pp tensor.to_narray
11
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rabbit-slide-kou-data-science-rb
3
3
  version: !ruby/object:Gem::Version
4
- version: 2017.5.19.2
4
+ version: 2017.5.19.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kouhei Sutou
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-05-19 00:00:00.000000000 Z
11
+ date: 2017-05-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rabbit
@@ -59,6 +59,9 @@ files:
59
59
  - sample/read-pandas.lua
60
60
  - sample/read-pandas.rb
61
61
  - sample/read-parquet.rb
62
+ - sample/read-tensor-gsl.rb
63
+ - sample/read-tensor-nmatrix.rb
64
+ - sample/read-tensor-numo-narray.rb
62
65
  - sample/read-tensor.rb
63
66
  - sample/write-feather.R
64
67
  - sample/write-pandas.py