athena-udf 0.1.1 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1ddf95a3316e3f1c9f3a6b21d067c4412aef09cc34982a5be141428c64974674
4
- data.tar.gz: a9a189b6a6a1d2651f96a198f75829b22cd1289bfd41f3a2543ec408531ae064
3
+ metadata.gz: ab79db7e49eadd3d4448779185ecfc592f497e0a81209964ae7c2b8e4fdbeed5
4
+ data.tar.gz: 3c2b577e2506f67b9c0e455d8e3988b844fb5762dba69fa91c508d75d5de18fe
5
5
  SHA512:
6
- metadata.gz: 83ce24ec41d03b9cf909c1a1f7b34cf952a79af775eabc00b2769cb6f862fd44c255a74961b9a99207f8581789b05f8202a3b09fb7b03f3785dde385c90f78d1
7
- data.tar.gz: 16a47b3b3504e204c65bd71590a8c63618ad5fd25f0453504874f91ba1aed92624a77b3e660ece6815eb31d096cfab6fcfc680e2bbd6ff5943e235a2c24c838b
6
+ metadata.gz: 03b34a9703a30047eaeaf03c2bf8b975f190bf1f7ad08c30f1f06f8dcf2b6a7eca90b29621437699ce9acd22579bc44f59d03941493fb27adcb173caa7488f7b
7
+ data.tar.gz: c9dc25db956961249e40e4909ec284b5234622c05933c0fa78347431344afd5e5ca09cc6c36fa6b480ff2b5e4893d095e94369e6a07fc72917d8df279d9791f8
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- athena-udf (0.1.0)
4
+ athena-udf (0.1.3)
5
5
  base64
6
6
  csv
7
7
  red-arrow (~> 12.0.1)
@@ -3,13 +3,13 @@
3
3
  module AthenaUDF
4
4
  module Utils
5
5
  def read_record_batches(schema_data, record_batch_data)
6
- Tempfile.create do |in_f|
7
- in_f.write(schema_data)
8
- in_f.write(record_batch_data)
9
- in_f.flush
6
+ buffer = Arrow::ResizableBuffer.new(schema_data.bytes.size + record_batch_data.bytes.size)
7
+ Arrow::BufferOutputStream.open(buffer) do |output|
8
+ output.write(schema_data)
9
+ output.write(record_batch_data)
10
10
 
11
- Arrow::MemoryMappedInputStream.open(in_f.path) do |inp|
12
- reader = Arrow::RecordBatchStreamReader.new(inp)
11
+ Arrow::BufferInputStream.open(buffer) do |input|
12
+ reader = Arrow::RecordBatchStreamReader.new(input)
13
13
  input_schema = reader.schema
14
14
  reader.each do |record_batch|
15
15
  yield input_schema, record_batch
@@ -19,61 +19,55 @@ module AthenaUDF
19
19
  end
20
20
 
21
21
  def read_schema(schema_data)
22
- # schema_buf = Arrow::Buffer.try_convert(schema_data)
23
- Tempfile.create do |f|
24
- f.write(schema_data)
25
- f.flush
22
+ buffer = Arrow::ResizableBuffer.new(schema_data.bytes.size)
23
+ Arrow::BufferOutputStream.open(buffer) do |output|
24
+ output.write(schema_data)
26
25
 
27
- Arrow::MemoryMappedInputStream.open(f.path) do |inp|
28
- reader = Arrow::RecordBatchStreamReader.new(inp)
26
+ Arrow::BufferInputStream.open(buffer) do |input|
27
+ reader = Arrow::RecordBatchStreamReader.new(input)
29
28
  reader.schema
30
29
  end
31
30
  end
32
31
  end
33
32
 
34
33
  def get_schema_bytes(schema, record_batch)
35
- Tempfile.create do |f|
36
- Arrow::FileOutputStream.open(f.path, false) do |oup|
37
- Arrow::RecordBatchStreamWriter.open(oup, schema) do |writer|
38
- writer.write_record_batch(record_batch)
39
- end
34
+ buffer = Arrow::ResizableBuffer.new(0)
35
+ Arrow::BufferOutputStream.open(buffer) do |output|
36
+ Arrow::RecordBatchStreamWriter.open(output, schema) do |writer|
37
+ writer.write_record_batch(record_batch)
40
38
  end
41
- f.flush
42
39
 
43
- data = File.binread(f.path)
44
- start_index = get_record_batch_index(data)
45
- data.bytes[4..start_index - 5].pack('C*')
40
+ bytes = buffer.data.to_s
41
+ start_index = get_record_batch_index(bytes)
42
+ bytes[4..start_index - 5]
46
43
  end
47
44
  end
48
45
 
49
46
  def get_record_batch_bytes(schema, record_batch)
50
- Tempfile.create do |f|
51
- Arrow::FileOutputStream.open(f.path, false) do |oup|
52
- Arrow::RecordBatchStreamWriter.open(oup, schema) do |writer|
53
- writer.write_record_batch(record_batch)
54
- end
47
+ buffer = Arrow::ResizableBuffer.new(0)
48
+ Arrow::BufferOutputStream.open(buffer) do |output|
49
+ Arrow::RecordBatchStreamWriter.open(output, schema) do |writer|
50
+ writer.write_record_batch(record_batch)
55
51
  end
56
- f.flush
57
52
 
58
- data = File.binread(f.path)
59
- start_index = get_record_batch_index(data)
60
- data.bytes[start_index..].pack('C*')
53
+ bytes = buffer.data.to_s
54
+ start_index = get_record_batch_index(bytes)
55
+ bytes[start_index..]
61
56
  end
62
57
  end
63
58
 
64
- def get_record_batch_index(raw)
65
- size = raw.bytes.size
59
+ def get_record_batch_index(bytes)
60
+ size = bytes.size
66
61
  found_count = 0
67
62
  start_index = 0
68
63
  0.upto(size - 4).each do |i|
69
- has_ffff = 4.times.all? do |n|
70
- raw.bytes[i + n] == 255
71
- end
64
+ has_ffff = bytes.slice(i, 4) == "\xFF\xFF\xFF\xFF".b
65
+
72
66
  found_count += 1 if has_ffff
73
- if found_count == 2
74
- start_index = i + 4
75
- break
76
- end
67
+ next unless found_count == 2
68
+
69
+ start_index = i + 4
70
+ break
77
71
  end
78
72
 
79
73
  start_index
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module AthenaUDF
4
- VERSION = '0.1.1'
4
+ VERSION = '0.1.3'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: athena-udf
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Daisuke Taniwaki
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-08-27 00:00:00.000000000 Z
11
+ date: 2024-09-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: base64