athena-udf 0.1.1 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/lib/athena-udf/utils.rb +33 -39
- data/lib/athena-udf/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ab79db7e49eadd3d4448779185ecfc592f497e0a81209964ae7c2b8e4fdbeed5
|
4
|
+
data.tar.gz: 3c2b577e2506f67b9c0e455d8e3988b844fb5762dba69fa91c508d75d5de18fe
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 03b34a9703a30047eaeaf03c2bf8b975f190bf1f7ad08c30f1f06f8dcf2b6a7eca90b29621437699ce9acd22579bc44f59d03941493fb27adcb173caa7488f7b
|
7
|
+
data.tar.gz: c9dc25db956961249e40e4909ec284b5234622c05933c0fa78347431344afd5e5ca09cc6c36fa6b480ff2b5e4893d095e94369e6a07fc72917d8df279d9791f8
|
data/Gemfile.lock
CHANGED
data/lib/athena-udf/utils.rb
CHANGED
@@ -3,13 +3,13 @@
|
|
3
3
|
module AthenaUDF
|
4
4
|
module Utils
|
5
5
|
def read_record_batches(schema_data, record_batch_data)
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
6
|
+
buffer = Arrow::ResizableBuffer.new(schema_data.bytes.size + record_batch_data.bytes.size)
|
7
|
+
Arrow::BufferOutputStream.open(buffer) do |output|
|
8
|
+
output.write(schema_data)
|
9
|
+
output.write(record_batch_data)
|
10
10
|
|
11
|
-
Arrow::
|
12
|
-
reader = Arrow::RecordBatchStreamReader.new(
|
11
|
+
Arrow::BufferInputStream.open(buffer) do |input|
|
12
|
+
reader = Arrow::RecordBatchStreamReader.new(input)
|
13
13
|
input_schema = reader.schema
|
14
14
|
reader.each do |record_batch|
|
15
15
|
yield input_schema, record_batch
|
@@ -19,61 +19,55 @@ module AthenaUDF
|
|
19
19
|
end
|
20
20
|
|
21
21
|
def read_schema(schema_data)
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
f.flush
|
22
|
+
buffer = Arrow::ResizableBuffer.new(schema_data.bytes.size)
|
23
|
+
Arrow::BufferOutputStream.open(buffer) do |output|
|
24
|
+
output.write(schema_data)
|
26
25
|
|
27
|
-
Arrow::
|
28
|
-
reader = Arrow::RecordBatchStreamReader.new(
|
26
|
+
Arrow::BufferInputStream.open(buffer) do |input|
|
27
|
+
reader = Arrow::RecordBatchStreamReader.new(input)
|
29
28
|
reader.schema
|
30
29
|
end
|
31
30
|
end
|
32
31
|
end
|
33
32
|
|
34
33
|
def get_schema_bytes(schema, record_batch)
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
end
|
34
|
+
buffer = Arrow::ResizableBuffer.new(0)
|
35
|
+
Arrow::BufferOutputStream.open(buffer) do |output|
|
36
|
+
Arrow::RecordBatchStreamWriter.open(output, schema) do |writer|
|
37
|
+
writer.write_record_batch(record_batch)
|
40
38
|
end
|
41
|
-
f.flush
|
42
39
|
|
43
|
-
|
44
|
-
start_index = get_record_batch_index(
|
45
|
-
|
40
|
+
bytes = buffer.data.to_s
|
41
|
+
start_index = get_record_batch_index(bytes)
|
42
|
+
bytes[4..start_index - 5]
|
46
43
|
end
|
47
44
|
end
|
48
45
|
|
49
46
|
def get_record_batch_bytes(schema, record_batch)
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
end
|
47
|
+
buffer = Arrow::ResizableBuffer.new(0)
|
48
|
+
Arrow::BufferOutputStream.open(buffer) do |output|
|
49
|
+
Arrow::RecordBatchStreamWriter.open(output, schema) do |writer|
|
50
|
+
writer.write_record_batch(record_batch)
|
55
51
|
end
|
56
|
-
f.flush
|
57
52
|
|
58
|
-
|
59
|
-
start_index = get_record_batch_index(
|
60
|
-
|
53
|
+
bytes = buffer.data.to_s
|
54
|
+
start_index = get_record_batch_index(bytes)
|
55
|
+
bytes[start_index..]
|
61
56
|
end
|
62
57
|
end
|
63
58
|
|
64
|
-
def get_record_batch_index(
|
65
|
-
size =
|
59
|
+
def get_record_batch_index(bytes)
|
60
|
+
size = bytes.size
|
66
61
|
found_count = 0
|
67
62
|
start_index = 0
|
68
63
|
0.upto(size - 4).each do |i|
|
69
|
-
has_ffff =
|
70
|
-
|
71
|
-
end
|
64
|
+
has_ffff = bytes.slice(i, 4) == "\xFF\xFF\xFF\xFF".b
|
65
|
+
|
72
66
|
found_count += 1 if has_ffff
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
67
|
+
next unless found_count == 2
|
68
|
+
|
69
|
+
start_index = i + 4
|
70
|
+
break
|
77
71
|
end
|
78
72
|
|
79
73
|
start_index
|
data/lib/athena-udf/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: athena-udf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Daisuke Taniwaki
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-09-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: base64
|