rabbit-slide-kou-data-science-rb 2017.5.19.2 → 2017.5.19.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8695ae18768ed7c379be63ca652892e35e6f375b
4
- data.tar.gz: 18f3b2b84d73480199907dd6c6f7f07c96e175ff
3
+ metadata.gz: 43bab7a71fb5de0ab27f1b82377c5164aaa519ca
4
+ data.tar.gz: 5c7b4e2166efc143d8e43f2739957f6cfeea232d
5
5
  SHA512:
6
- metadata.gz: b2cad6aec92ce83e33c0db57466363103da2c8447e99520af768f8013153817465de9066a365d1b6f91998852b9621a71f62a8f7d3ea00584ecf20cd3a722e66
7
- data.tar.gz: 4f48ab9fd025c9a79633a2fea320e177a45a5fe84641e9da09285225076787be3a592e50c332e4f698a1fa23be8d41f61fbd0fb7c5496f20bda9ec7e9118abe6
6
+ metadata.gz: 69473a1db8461ad44afe52498c83590269a903d8144b33f3a0ab07ac939b04d22fcfc8cc523101d11e3d27a1a835716a187d02703060f27767acc540f989946f
7
+ data.tar.gz: 39eefdc301d3f32ec74f55e22a33f3970d7346e93baab62be512f349ecd593fe89818565611d3c6e615c0b586c5cd90e89441e9a78031f328e94521f32c3e07c
@@ -7,7 +7,7 @@ tags:
7
7
  - arrow
8
8
  - data
9
9
  presentation_date: 2017-05-19
10
- version: 2017.5.19.2
10
+ version: 2017.5.19.3
11
11
  licenses:
12
12
  - CC-BY-SA-4.0
13
13
  slideshare_id: datasciencerb
@@ -117,13 +117,14 @@ Rubyでやりたい
117
117
 
118
118
  # rouge python
119
119
  # pandasでデータ生成→Arrow形式で書き込み
120
+ import pandas as pd
120
121
  import pyarrow as pa
121
122
 
122
123
  df = pd.DataFrame({"a": [1, 2, 3],
123
124
  "b": ["hello", "world", "!"]})
124
125
  record_batch = pa.RecordBatch.from_pandas(df)
125
126
 
126
- with pa.OSFile("/tmp/pandas.arrow", "wb") as sink:
127
+ with pa.OSFile("/dev/shm/pandas.arrow", "wb") as sink:
127
128
  schema = record_batch.schema
128
129
  writer = pa.RecordBatchFileWriter(sink, schema)
129
130
  writer.write_batch(record_batch)
@@ -136,7 +137,7 @@ Rubyでやりたい
136
137
  require "arrow"
137
138
 
138
139
  Input = Arrow::MemoryMappedInputStream
139
- Input.open("/tmp/pandas.arrow") do |input|
140
+ Input.open("/dev/shm/pandas.arrow") do |input|
140
141
  reader = Arrow::RecordBatchFileReader.new(input)
141
142
  reader.each do |record_batch|
142
143
  puts("=" * 40)
@@ -153,7 +154,7 @@ Rubyでやりたい
153
154
  local Arrow = lgi.Arrow
154
155
 
155
156
  local input_class = Arrow.MemoryMappedInputStream
156
- local input = input_class.new("/tmp/pandas.arrow")
157
+ local input = input_class.new("/dev/shm/pandas.arrow")
157
158
  local reader = Arrow.RecordBatchFileReader.new(input)
158
159
  for i = 0, reader:get_n_record_batches() - 1 do
159
160
  local record_batch = reader:get_record_batch(i)
@@ -171,7 +172,7 @@ Rubyでやりたい
171
172
 
172
173
  df = data.frame(a=c(1, 2, 3),
173
174
  b=c(1.1, 2.2, 3.3))
174
- write_feather(df, "/tmp/dataframe.feather")
175
+ write_feather(df, "/dev/shm/dataframe.feather")
175
176
 
176
177
  = Feather:Ruby
177
178
 
@@ -180,7 +181,7 @@ Rubyでやりたい
180
181
  require "arrow"
181
182
 
182
183
  Input = Arrow::MemoryMappedInputStream
183
- Input.open("/tmp/dataframe.feather") do |input|
184
+ Input.open("/dev/shm/dataframe.feather") do |input|
184
185
  reader = Arrow::FeatherFileReader.new(input)
185
186
  reader.columns.each do |column|
186
187
  puts("#{column.name}: #{column.to_a.inspect}")
@@ -198,7 +199,7 @@ Rubyでやりたい
198
199
  df = pd.DataFrame({"a": [1, 2, 3],
199
200
  "b": ["hello", "world", "!"]})
200
201
  table = pa.Table.from_pandas(df)
201
- pq.write_table(table, "/tmp/pandas.parquet")
202
+ pq.write_table(table, "/dev/shm/pandas.parquet")
202
203
 
203
204
  = Parquet:Ruby
204
205
 
@@ -207,7 +208,7 @@ Rubyでやりたい
207
208
  require "arrow"
208
209
  require "parquet"
209
210
 
210
- path = "/tmp/pandas.parquet"
211
+ path = "/dev/shm/pandas.parquet"
211
212
  reader = Parquet::ArrowFileReader.new(path)
212
213
  table = reader.read_table
213
214
  table.each_column do |column|
@@ -237,14 +238,14 @@ Rubyでやりたい
237
238
  # rouge ruby
238
239
  # 空のテーブルにArrow形式のデータを読み込む
239
240
  logs = Groonga::Array.create(name: "logs")
240
- logs.load_arrow("/tmp/pandas.arrow")
241
+ logs.load_arrow("/dev/shm/pandas.arrow")
241
242
  logs.each {|record| p record.attributes}
242
243
  # フィルター
243
244
  filtered_logs = logs.select do |record|
244
245
  record.b =~ "hello" # "hello"で全文検索
245
246
  end
246
247
  # フィルター結果をArrow形式で書き込み
247
- filtered_logs.dump_arrow("/tmp/filtered.arrow",
248
+ filtered_logs.dump_arrow("/dev/shm/filtered.arrow",
248
249
  column_names: ["a", "b"])
249
250
 
250
251
  = Groonga:Python
@@ -253,7 +254,7 @@ Rubyでやりたい
253
254
  # Arrow形式のGroongaでのフィルター結果を読み込む
254
255
  import pyarrow as pa
255
256
 
256
- with pa.OSFile("/tmp/filtered.arrow") as source:
257
+ with pa.OSFile("/dev/shm/filtered.arrow") as source:
257
258
  writer = pa.RecordBatchFileReader(source)
258
259
  print(writer.get_record_batch(0).to_pandas())
259
260
 
@@ -294,7 +295,7 @@ Rubyでやりたい
294
295
  ndarray = np.random.randn(10, 6) # 10x6
295
296
  print(ndarray)
296
297
  tensor = pa.Tensor.from_numpy(ndarray)
297
- with pa.OSFile("/tmp/tensor.arrow", "wb") as sink:
298
+ with pa.OSFile("/dev/shm/tensor.arrow", "wb") as sink:
298
299
  pa.write_tensor(tensor, sink)
299
300
 
300
301
  = Tensor:Ruby
@@ -304,7 +305,7 @@ Rubyでやりたい
304
305
  require "arrow"
305
306
 
306
307
  Input = Arrow::MemoryMappedInputStream
307
- Input.open("/tmp/tensor.arrow") do |input|
308
+ Input.open("/dev/shm/tensor.arrow") do |input|
308
309
  tensor = input.read_tensor(0)
309
310
  p tensor.shape # => [10, 6]
310
311
  end
@@ -318,7 +319,7 @@ Rubyでやりたい
318
319
  require "pp"
319
320
 
320
321
  Input = Arrow::MemoryMappedInputStream
321
- Input.open("/tmp/tensor.arrow") do |input|
322
+ Input.open("/dev/shm/tensor.arrow") do |input|
322
323
  tensor = input.read_tensor(0)
323
324
  pp tensor.to_gsl
324
325
  # tensor.to_gsl.to_arrow == tensor
@@ -333,7 +334,7 @@ Rubyでやりたい
333
334
  require "pp"
334
335
 
335
336
  Input = Arrow::MemoryMappedInputStream
336
- Input.open("/tmp/tensor.arrow") do |input|
337
+ Input.open("/dev/shm/tensor.arrow") do |input|
337
338
  tensor = input.read_tensor(0)
338
339
  pp tensor.to_nmatrix
339
340
  # tensor.to_nmatrix.to_arrow == tensor
@@ -348,7 +349,7 @@ Rubyでやりたい
348
349
  require "pp"
349
350
 
350
351
  Input = Arrow::MemoryMappedInputStream
351
- Input.open("/tmp/tensor.arrow") do |input|
352
+ Input.open("/dev/shm/tensor.arrow") do |input|
352
353
  tensor = input.read_tensor(0)
353
354
  pp tensor.to_narray
354
355
  # tensor.to_narray.to_arrow == tensor
@@ -382,8 +383,8 @@ Rubyでやりたい
382
383
 
383
384
  * Rubyでデータ処理したいなぁ!\n
384
385
  の実現を目指すプロジェクト
385
- * URL:
386
- * https://github.io/red-data-tools
386
+ * リソース:
387
+ * GitHub: red-data-tools
387
388
  * https://red-data-tools.github.io
388
389
  * https://gitter.im/red-data-tools
389
390
 
@@ -457,7 +458,7 @@ Because we use Ruby!
457
458
 
458
459
  * Rubyでデータ処理したい人!
459
460
  * ポリシーに同意できる人!
460
- * URL:
461
- * https://github.io/red-data-tools
461
+ * リソース:
462
+ * GitHub: red-data-tools
462
463
  * https://red-data-tools.github.io
463
464
  * https://gitter.im/red-data-tools
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "arrow"
4
+ require "arrow-gsl"
5
+
6
+ require "pp"
7
+
8
+ Arrow::MemoryMappedInputStream.open("/tmp/tensor.arrow") do |input|
9
+ tensor = input.read_tensor(0)
10
+ pp tensor.to_gsl
11
+ end
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "arrow"
4
+ require "arrow-nmatrix"
5
+
6
+ require "pp"
7
+
8
+ Arrow::MemoryMappedInputStream.open("/tmp/tensor.arrow") do |input|
9
+ tensor = input.read_tensor(0)
10
+ pp tensor.to_nmatrix
11
+ end
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "arrow"
4
+ require "arrow-numo-narray"
5
+
6
+ require "pp"
7
+
8
+ Arrow::MemoryMappedInputStream.open("/tmp/tensor.arrow") do |input|
9
+ tensor = input.read_tensor(0)
10
+ pp tensor.to_narray
11
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rabbit-slide-kou-data-science-rb
3
3
  version: !ruby/object:Gem::Version
4
- version: 2017.5.19.2
4
+ version: 2017.5.19.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kouhei Sutou
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-05-19 00:00:00.000000000 Z
11
+ date: 2017-05-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rabbit
@@ -59,6 +59,9 @@ files:
59
59
  - sample/read-pandas.lua
60
60
  - sample/read-pandas.rb
61
61
  - sample/read-parquet.rb
62
+ - sample/read-tensor-gsl.rb
63
+ - sample/read-tensor-nmatrix.rb
64
+ - sample/read-tensor-numo-narray.rb
62
65
  - sample/read-tensor.rb
63
66
  - sample/write-feather.R
64
67
  - sample/write-pandas.py