athena-udf 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1ddf95a3316e3f1c9f3a6b21d067c4412aef09cc34982a5be141428c64974674
4
- data.tar.gz: a9a189b6a6a1d2651f96a198f75829b22cd1289bfd41f3a2543ec408531ae064
3
+ metadata.gz: ab79db7e49eadd3d4448779185ecfc592f497e0a81209964ae7c2b8e4fdbeed5
4
+ data.tar.gz: 3c2b577e2506f67b9c0e455d8e3988b844fb5762dba69fa91c508d75d5de18fe
5
5
  SHA512:
6
- metadata.gz: 83ce24ec41d03b9cf909c1a1f7b34cf952a79af775eabc00b2769cb6f862fd44c255a74961b9a99207f8581789b05f8202a3b09fb7b03f3785dde385c90f78d1
7
- data.tar.gz: 16a47b3b3504e204c65bd71590a8c63618ad5fd25f0453504874f91ba1aed92624a77b3e660ece6815eb31d096cfab6fcfc680e2bbd6ff5943e235a2c24c838b
6
+ metadata.gz: 03b34a9703a30047eaeaf03c2bf8b975f190bf1f7ad08c30f1f06f8dcf2b6a7eca90b29621437699ce9acd22579bc44f59d03941493fb27adcb173caa7488f7b
7
+ data.tar.gz: c9dc25db956961249e40e4909ec284b5234622c05933c0fa78347431344afd5e5ca09cc6c36fa6b480ff2b5e4893d095e94369e6a07fc72917d8df279d9791f8
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- athena-udf (0.1.0)
4
+ athena-udf (0.1.3)
5
5
  base64
6
6
  csv
7
7
  red-arrow (~> 12.0.1)
@@ -3,13 +3,13 @@
3
3
  module AthenaUDF
4
4
  module Utils
5
5
  def read_record_batches(schema_data, record_batch_data)
6
- Tempfile.create do |in_f|
7
- in_f.write(schema_data)
8
- in_f.write(record_batch_data)
9
- in_f.flush
6
+ buffer = Arrow::ResizableBuffer.new(schema_data.bytes.size + record_batch_data.bytes.size)
7
+ Arrow::BufferOutputStream.open(buffer) do |output|
8
+ output.write(schema_data)
9
+ output.write(record_batch_data)
10
10
 
11
- Arrow::MemoryMappedInputStream.open(in_f.path) do |inp|
12
- reader = Arrow::RecordBatchStreamReader.new(inp)
11
+ Arrow::BufferInputStream.open(buffer) do |input|
12
+ reader = Arrow::RecordBatchStreamReader.new(input)
13
13
  input_schema = reader.schema
14
14
  reader.each do |record_batch|
15
15
  yield input_schema, record_batch
@@ -19,61 +19,55 @@ module AthenaUDF
19
19
  end
20
20
 
21
21
  def read_schema(schema_data)
22
- # schema_buf = Arrow::Buffer.try_convert(schema_data)
23
- Tempfile.create do |f|
24
- f.write(schema_data)
25
- f.flush
22
+ buffer = Arrow::ResizableBuffer.new(schema_data.bytes.size)
23
+ Arrow::BufferOutputStream.open(buffer) do |output|
24
+ output.write(schema_data)
26
25
 
27
- Arrow::MemoryMappedInputStream.open(f.path) do |inp|
28
- reader = Arrow::RecordBatchStreamReader.new(inp)
26
+ Arrow::BufferInputStream.open(buffer) do |input|
27
+ reader = Arrow::RecordBatchStreamReader.new(input)
29
28
  reader.schema
30
29
  end
31
30
  end
32
31
  end
33
32
 
34
33
  def get_schema_bytes(schema, record_batch)
35
- Tempfile.create do |f|
36
- Arrow::FileOutputStream.open(f.path, false) do |oup|
37
- Arrow::RecordBatchStreamWriter.open(oup, schema) do |writer|
38
- writer.write_record_batch(record_batch)
39
- end
34
+ buffer = Arrow::ResizableBuffer.new(0)
35
+ Arrow::BufferOutputStream.open(buffer) do |output|
36
+ Arrow::RecordBatchStreamWriter.open(output, schema) do |writer|
37
+ writer.write_record_batch(record_batch)
40
38
  end
41
- f.flush
42
39
 
43
- data = File.binread(f.path)
44
- start_index = get_record_batch_index(data)
45
- data.bytes[4..start_index - 5].pack('C*')
40
+ bytes = buffer.data.to_s
41
+ start_index = get_record_batch_index(bytes)
42
+ bytes[4..start_index - 5]
46
43
  end
47
44
  end
48
45
 
49
46
  def get_record_batch_bytes(schema, record_batch)
50
- Tempfile.create do |f|
51
- Arrow::FileOutputStream.open(f.path, false) do |oup|
52
- Arrow::RecordBatchStreamWriter.open(oup, schema) do |writer|
53
- writer.write_record_batch(record_batch)
54
- end
47
+ buffer = Arrow::ResizableBuffer.new(0)
48
+ Arrow::BufferOutputStream.open(buffer) do |output|
49
+ Arrow::RecordBatchStreamWriter.open(output, schema) do |writer|
50
+ writer.write_record_batch(record_batch)
55
51
  end
56
- f.flush
57
52
 
58
- data = File.binread(f.path)
59
- start_index = get_record_batch_index(data)
60
- data.bytes[start_index..].pack('C*')
53
+ bytes = buffer.data.to_s
54
+ start_index = get_record_batch_index(bytes)
55
+ bytes[start_index..]
61
56
  end
62
57
  end
63
58
 
64
- def get_record_batch_index(raw)
65
- size = raw.bytes.size
59
+ def get_record_batch_index(bytes)
60
+ size = bytes.size
66
61
  found_count = 0
67
62
  start_index = 0
68
63
  0.upto(size - 4).each do |i|
69
- has_ffff = 4.times.all? do |n|
70
- raw.bytes[i + n] == 255
71
- end
64
+ has_ffff = bytes.slice(i, 4) == "\xFF\xFF\xFF\xFF".b
65
+
72
66
  found_count += 1 if has_ffff
73
- if found_count == 2
74
- start_index = i + 4
75
- break
76
- end
67
+ next unless found_count == 2
68
+
69
+ start_index = i + 4
70
+ break
77
71
  end
78
72
 
79
73
  start_index
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module AthenaUDF
4
- VERSION = '0.1.1'
4
+ VERSION = '0.1.3'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: athena-udf
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Daisuke Taniwaki
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-08-27 00:00:00.000000000 Z
11
+ date: 2024-09-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: base64