RubyGems - rabbit-slide-kou-data-science-rb - Versions diffs - 2017.5.19.2 → 2017.5.19.3 - Mend

rabbit-slide-kou-data-science-rb 2017.5.19.2 → 2017.5.19.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

checksums.yaml +4 -4
data/config.yaml +1 -1
data/pdf/data-science-rb-ruby-with-apache-arrow-joins-data-processing-languages.pdf +0 -0
data/ruby-with-apache-arrow-joins-data-processing-languages.rab +20 -19
data/sample/read-tensor-gsl.rb +11 -0
data/sample/read-tensor-nmatrix.rb +11 -0
data/sample/read-tensor-numo-narray.rb +11 -0
metadata +5 -2

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 8695ae18768ed7c379be63ca652892e35e6f375b
-  data.tar.gz: 18f3b2b84d73480199907dd6c6f7f07c96e175ff
+  metadata.gz: 43bab7a71fb5de0ab27f1b82377c5164aaa519ca
+  data.tar.gz: 5c7b4e2166efc143d8e43f2739957f6cfeea232d
 SHA512:
-  metadata.gz: b2cad6aec92ce83e33c0db57466363103da2c8447e99520af768f8013153817465de9066a365d1b6f91998852b9621a71f62a8f7d3ea00584ecf20cd3a722e66
-  data.tar.gz: 4f48ab9fd025c9a79633a2fea320e177a45a5fe84641e9da09285225076787be3a592e50c332e4f698a1fa23be8d41f61fbd0fb7c5496f20bda9ec7e9118abe6
+  metadata.gz: 69473a1db8461ad44afe52498c83590269a903d8144b33f3a0ab07ac939b04d22fcfc8cc523101d11e3d27a1a835716a187d02703060f27767acc540f989946f
+  data.tar.gz: 39eefdc301d3f32ec74f55e22a33f3970d7346e93baab62be512f349ecd593fe89818565611d3c6e615c0b586c5cd90e89441e9a78031f328e94521f32c3e07c

data/config.yaml CHANGED

@@ -7,7 +7,7 @@ tags:
 - arrow
 - data
 presentation_date: 2017-05-19
-version: 2017.5.19.2
+version: 2017.5.19.3
 licenses:
 - CC-BY-SA-4.0
 slideshare_id: datasciencerb

data/pdf/data-science-rb-ruby-with-apache-arrow-joins-data-processing-languages.pdf CHANGED

Binary file

data/ruby-with-apache-arrow-joins-data-processing-languages.rab CHANGED

@@ -117,13 +117,14 @@ Rubyでやりたい
   # rouge python
   # pandasでデータ生成→Arrow形式で書き込み
+  import pandas as pd
   import pyarrow as pa
   df = pd.DataFrame({"a": [1, 2, 3],
                      "b": ["hello", "world", "!"]})
   record_batch = pa.RecordBatch.from_pandas(df)
-  with pa.OSFile("/tmp/pandas.arrow", "wb") as sink:
+  with pa.OSFile("/dev/shm/pandas.arrow", "wb") as sink:
       schema = record_batch.schema
       writer = pa.RecordBatchFileWriter(sink, schema)
       writer.write_batch(record_batch)
@@ -136,7 +137,7 @@ Rubyでやりたい
   require "arrow"
   Input = Arrow::MemoryMappedInputStream
-  Input.open("/tmp/pandas.arrow") do |input|
+  Input.open("/dev/shm/pandas.arrow") do |input|
     reader = Arrow::RecordBatchFileReader.new(input)
     reader.each do |record_batch|
       puts("=" * 40)
@@ -153,7 +154,7 @@ Rubyでやりたい
   local Arrow = lgi.Arrow
   local input_class = Arrow.MemoryMappedInputStream
-  local input = input_class.new("/tmp/pandas.arrow")
+  local input = input_class.new("/dev/shm/pandas.arrow")
   local reader = Arrow.RecordBatchFileReader.new(input)
   for i = 0, reader:get_n_record_batches() - 1 do
      local record_batch = reader:get_record_batch(i)
@@ -171,7 +172,7 @@ Rubyでやりたい
   df = data.frame(a=c(1, 2, 3),
                   b=c(1.1, 2.2, 3.3))
-  write_feather(df, "/tmp/dataframe.feather")
+  write_feather(df, "/dev/shm/dataframe.feather")
 = Feather：Ruby
@@ -180,7 +181,7 @@ Rubyでやりたい
   require "arrow"
   Input = Arrow::MemoryMappedInputStream
-  Input.open("/tmp/dataframe.feather") do |input|
+  Input.open("/dev/shm/dataframe.feather") do |input|
     reader = Arrow::FeatherFileReader.new(input)
     reader.columns.each do |column|
       puts("#{column.name}: #{column.to_a.inspect}")
@@ -198,7 +199,7 @@ Rubyでやりたい
   df = pd.DataFrame({"a": [1, 2, 3],
                      "b": ["hello", "world", "!"]})
   table = pa.Table.from_pandas(df)
-  pq.write_table(table, "/tmp/pandas.parquet")
+  pq.write_table(table, "/dev/shm/pandas.parquet")
 = Parquet：Ruby
@@ -207,7 +208,7 @@ Rubyでやりたい
   require "arrow"
   require "parquet"
-  path = "/tmp/pandas.parquet"
+  path = "/dev/shm/pandas.parquet"
   reader = Parquet::ArrowFileReader.new(path)
   table = reader.read_table
   table.each_column do |column|
@@ -237,14 +238,14 @@ Rubyでやりたい
   # rouge ruby
   # 空のテーブルにArrow形式のデータを読み込む
   logs = Groonga::Array.create(name: "logs")
-  logs.load_arrow("/tmp/pandas.arrow")
+  logs.load_arrow("/dev/shm/pandas.arrow")
   logs.each {|record| p record.attributes}
   # フィルター
   filtered_logs = logs.select do |record|
     record.b =~ "hello" # "hello"で全文検索
   end
   # フィルター結果をArrow形式で書き込み
-  filtered_logs.dump_arrow("/tmp/filtered.arrow",
+  filtered_logs.dump_arrow("/dev/shm/filtered.arrow",
                            column_names: ["a", "b"])
 = Groonga：Python
@@ -253,7 +254,7 @@ Rubyでやりたい
   # Arrow形式のGroongaでのフィルター結果を読み込む
   import pyarrow as pa
-  with pa.OSFile("/tmp/filtered.arrow") as source:
+  with pa.OSFile("/dev/shm/filtered.arrow") as source:
       writer = pa.RecordBatchFileReader(source)
       print(writer.get_record_batch(0).to_pandas())
@@ -294,7 +295,7 @@ Rubyでやりたい
   ndarray = np.random.randn(10, 6) # 10x6
   print(ndarray)
   tensor = pa.Tensor.from_numpy(ndarray)
-  with pa.OSFile("/tmp/tensor.arrow", "wb") as sink:
+  with pa.OSFile("/dev/shm/tensor.arrow", "wb") as sink:
       pa.write_tensor(tensor, sink)
 = Tensor：Ruby
@@ -304,7 +305,7 @@ Rubyでやりたい
   require "arrow"
   Input = Arrow::MemoryMappedInputStream
-  Input.open("/tmp/tensor.arrow") do |input|
+  Input.open("/dev/shm/tensor.arrow") do |input|
     tensor = input.read_tensor(0)
     p tensor.shape # => [10, 6]
   end
@@ -318,7 +319,7 @@ Rubyでやりたい
   require "pp"
   Input = Arrow::MemoryMappedInputStream
-  Input.open("/tmp/tensor.arrow") do |input|
+  Input.open("/dev/shm/tensor.arrow") do |input|
     tensor = input.read_tensor(0)
     pp tensor.to_gsl
     # tensor.to_gsl.to_arrow == tensor
@@ -333,7 +334,7 @@ Rubyでやりたい
   require "pp"
   Input = Arrow::MemoryMappedInputStream
-  Input.open("/tmp/tensor.arrow") do |input|
+  Input.open("/dev/shm/tensor.arrow") do |input|
     tensor = input.read_tensor(0)
     pp tensor.to_nmatrix
     # tensor.to_nmatrix.to_arrow == tensor
@@ -348,7 +349,7 @@ Rubyでやりたい
   require "pp"
   Input = Arrow::MemoryMappedInputStream
-  Input.open("/tmp/tensor.arrow") do |input|
+  Input.open("/dev/shm/tensor.arrow") do |input|
     tensor = input.read_tensor(0)
     pp tensor.to_narray
     # tensor.to_narray.to_arrow == tensor
@@ -382,8 +383,8 @@ Rubyでやりたい
   * Rubyでデータ処理したいなぁ！\n
     の実現を目指すプロジェクト
-  * URL：
-    * https://github.io/red-data-tools
+  * リソース：
+    * GitHub: red-data-tools
     * https://red-data-tools.github.io
     * https://gitter.im/red-data-tools
@@ -457,7 +458,7 @@ Because we use Ruby!
   * Rubyでデータ処理したい人！
   * ポリシーに同意できる人！
-  * URL：
-    * https://github.io/red-data-tools
+  * リソース：
+    * GitHub: red-data-tools
     * https://red-data-tools.github.io
     * https://gitter.im/red-data-tools

data/sample/read-tensor-gsl.rb ADDED

@@ -0,0 +1,11 @@
+#!/usr/bin/env ruby
+require "arrow"
+require "arrow-gsl"
+require "pp"
+Arrow::MemoryMappedInputStream.open("/tmp/tensor.arrow") do |input|
+  tensor = input.read_tensor(0)
+  pp tensor.to_gsl
+end

data/sample/read-tensor-nmatrix.rb ADDED

@@ -0,0 +1,11 @@
+#!/usr/bin/env ruby
+require "arrow"
+require "arrow-nmatrix"
+require "pp"
+Arrow::MemoryMappedInputStream.open("/tmp/tensor.arrow") do |input|
+  tensor = input.read_tensor(0)
+  pp tensor.to_nmatrix
+end

data/sample/read-tensor-numo-narray.rb ADDED

@@ -0,0 +1,11 @@
+#!/usr/bin/env ruby
+require "arrow"
+require "arrow-numo-narray"
+require "pp"
+Arrow::MemoryMappedInputStream.open("/tmp/tensor.arrow") do |input|
+  tensor = input.read_tensor(0)
+  pp tensor.to_narray
+end

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: rabbit-slide-kou-data-science-rb
 version: !ruby/object:Gem::Version
-  version: 2017.5.19.2
+  version: 2017.5.19.3
 platform: ruby
 authors:
 - Kouhei Sutou
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2017-05-19 00:00:00.000000000 Z
+date: 2017-05-20 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rabbit
@@ -59,6 +59,9 @@ files:
 - sample/read-pandas.lua
 - sample/read-pandas.rb
 - sample/read-parquet.rb
+- sample/read-tensor-gsl.rb
+- sample/read-tensor-nmatrix.rb
+- sample/read-tensor-numo-narray.rb
 - sample/read-tensor.rb
 - sample/write-feather.R
 - sample/write-pandas.py