athena-udf 0.1.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1ddf95a3316e3f1c9f3a6b21d067c4412aef09cc34982a5be141428c64974674
4
- data.tar.gz: a9a189b6a6a1d2651f96a198f75829b22cd1289bfd41f3a2543ec408531ae064
3
+ metadata.gz: 1fca59aa26a814ceb98c86febbff75b38a5de2cb888c7291c483ed49aafe2301
4
+ data.tar.gz: 1a1020ab4893a1689771c5a35049eeca8ec778d7e452de5de660790c8394f600
5
5
  SHA512:
6
- metadata.gz: 83ce24ec41d03b9cf909c1a1f7b34cf952a79af775eabc00b2769cb6f862fd44c255a74961b9a99207f8581789b05f8202a3b09fb7b03f3785dde385c90f78d1
7
- data.tar.gz: 16a47b3b3504e204c65bd71590a8c63618ad5fd25f0453504874f91ba1aed92624a77b3e660ece6815eb31d096cfab6fcfc680e2bbd6ff5943e235a2c24c838b
6
+ metadata.gz: 9f2f349f806c505d4957674ac6c18bf326b8c2cae08d259d30501d550b4f5fa8f73f5e79c5ec5eecab8da250abc91a3bb4bf0cddcc3c04e7288a2df2df79f76a
7
+ data.tar.gz: efe4807a051325233719ffa97aeec66564ad5c02a2703f70ebccb725a464f0be7d8142be42fae0073706f8b56edddf94a58fa7e15bbb3734c0bd2242a9f14475
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- athena-udf (0.1.0)
4
+ athena-udf (0.1.2)
5
5
  base64
6
6
  csv
7
7
  red-arrow (~> 12.0.1)
@@ -3,13 +3,13 @@
3
3
  module AthenaUDF
4
4
  module Utils
5
5
  def read_record_batches(schema_data, record_batch_data)
6
- Tempfile.create do |in_f|
7
- in_f.write(schema_data)
8
- in_f.write(record_batch_data)
9
- in_f.flush
6
+ buffer = Arrow::ResizableBuffer.new(schema_data.bytes.size + record_batch_data.bytes.size)
7
+ Arrow::BufferOutputStream.open(buffer) do |output|
8
+ output.write(schema_data)
9
+ output.write(record_batch_data)
10
10
 
11
- Arrow::MemoryMappedInputStream.open(in_f.path) do |inp|
12
- reader = Arrow::RecordBatchStreamReader.new(inp)
11
+ Arrow::BufferInputStream.open(buffer) do |input|
12
+ reader = Arrow::RecordBatchStreamReader.new(input)
13
13
  input_schema = reader.schema
14
14
  reader.each do |record_batch|
15
15
  yield input_schema, record_batch
@@ -19,56 +19,49 @@ module AthenaUDF
19
19
  end
20
20
 
21
21
  def read_schema(schema_data)
22
- # schema_buf = Arrow::Buffer.try_convert(schema_data)
23
- Tempfile.create do |f|
24
- f.write(schema_data)
25
- f.flush
22
+ buffer = Arrow::ResizableBuffer.new(schema_data.bytes.size)
23
+ Arrow::BufferOutputStream.open(buffer) do |output|
24
+ output.write(schema_data)
26
25
 
27
- Arrow::MemoryMappedInputStream.open(f.path) do |inp|
28
- reader = Arrow::RecordBatchStreamReader.new(inp)
26
+ Arrow::BufferInputStream.open(buffer) do |input|
27
+ reader = Arrow::RecordBatchStreamReader.new(input)
29
28
  reader.schema
30
29
  end
31
30
  end
32
31
  end
33
32
 
34
33
  def get_schema_bytes(schema, record_batch)
35
- Tempfile.create do |f|
36
- Arrow::FileOutputStream.open(f.path, false) do |oup|
37
- Arrow::RecordBatchStreamWriter.open(oup, schema) do |writer|
38
- writer.write_record_batch(record_batch)
39
- end
34
+ buffer = Arrow::ResizableBuffer.new(0)
35
+ Arrow::BufferOutputStream.open(buffer) do |output|
36
+ Arrow::RecordBatchStreamWriter.open(output, schema) do |writer|
37
+ writer.write_record_batch(record_batch)
40
38
  end
41
- f.flush
42
39
 
43
- data = File.binread(f.path)
44
- start_index = get_record_batch_index(data)
45
- data.bytes[4..start_index - 5].pack('C*')
40
+ bytes = buffer.data.to_s
41
+ start_index = get_record_batch_index(bytes)
42
+ bytes[4..start_index - 5]
46
43
  end
47
44
  end
48
45
 
49
46
  def get_record_batch_bytes(schema, record_batch)
50
- Tempfile.create do |f|
51
- Arrow::FileOutputStream.open(f.path, false) do |oup|
52
- Arrow::RecordBatchStreamWriter.open(oup, schema) do |writer|
53
- writer.write_record_batch(record_batch)
54
- end
47
+ buffer = Arrow::ResizableBuffer.new(0)
48
+ Arrow::BufferOutputStream.open(buffer) do |output|
49
+ Arrow::RecordBatchStreamWriter.open(output, schema) do |writer|
50
+ writer.write_record_batch(record_batch)
55
51
  end
56
- f.flush
57
52
 
58
- data = File.binread(f.path)
59
- start_index = get_record_batch_index(data)
60
- data.bytes[start_index..].pack('C*')
53
+ bytes = buffer.data.to_s
54
+ start_index = get_record_batch_index(bytes)
55
+ bytes[start_index..]
61
56
  end
62
57
  end
63
58
 
64
- def get_record_batch_index(raw)
65
- size = raw.bytes.size
59
+ def get_record_batch_index(bytes)
60
+ size = bytes.size
66
61
  found_count = 0
67
62
  start_index = 0
68
63
  0.upto(size - 4).each do |i|
69
- has_ffff = 4.times.all? do |n|
70
- raw.bytes[i + n] == 255
71
- end
64
+ has_ffff = bytes.slice(i, 4) == [255, 255, 255, 255]
72
65
  found_count += 1 if has_ffff
73
66
  if found_count == 2
74
67
  start_index = i + 4
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module AthenaUDF
4
- VERSION = '0.1.1'
4
+ VERSION = '0.1.2'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: athena-udf
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Daisuke Taniwaki
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-08-27 00:00:00.000000000 Z
11
+ date: 2024-09-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: base64