athena-udf 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/lib/athena-udf/utils.rb +33 -39
- data/lib/athena-udf/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ab79db7e49eadd3d4448779185ecfc592f497e0a81209964ae7c2b8e4fdbeed5
|
4
|
+
data.tar.gz: 3c2b577e2506f67b9c0e455d8e3988b844fb5762dba69fa91c508d75d5de18fe
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 03b34a9703a30047eaeaf03c2bf8b975f190bf1f7ad08c30f1f06f8dcf2b6a7eca90b29621437699ce9acd22579bc44f59d03941493fb27adcb173caa7488f7b
|
7
|
+
data.tar.gz: c9dc25db956961249e40e4909ec284b5234622c05933c0fa78347431344afd5e5ca09cc6c36fa6b480ff2b5e4893d095e94369e6a07fc72917d8df279d9791f8
|
data/Gemfile.lock
CHANGED
data/lib/athena-udf/utils.rb
CHANGED
@@ -3,13 +3,13 @@
|
|
3
3
|
module AthenaUDF
|
4
4
|
module Utils
|
5
5
|
def read_record_batches(schema_data, record_batch_data)
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
6
|
+
buffer = Arrow::ResizableBuffer.new(schema_data.bytes.size + record_batch_data.bytes.size)
|
7
|
+
Arrow::BufferOutputStream.open(buffer) do |output|
|
8
|
+
output.write(schema_data)
|
9
|
+
output.write(record_batch_data)
|
10
10
|
|
11
|
-
Arrow::
|
12
|
-
reader = Arrow::RecordBatchStreamReader.new(
|
11
|
+
Arrow::BufferInputStream.open(buffer) do |input|
|
12
|
+
reader = Arrow::RecordBatchStreamReader.new(input)
|
13
13
|
input_schema = reader.schema
|
14
14
|
reader.each do |record_batch|
|
15
15
|
yield input_schema, record_batch
|
@@ -19,61 +19,55 @@ module AthenaUDF
|
|
19
19
|
end
|
20
20
|
|
21
21
|
def read_schema(schema_data)
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
f.flush
|
22
|
+
buffer = Arrow::ResizableBuffer.new(schema_data.bytes.size)
|
23
|
+
Arrow::BufferOutputStream.open(buffer) do |output|
|
24
|
+
output.write(schema_data)
|
26
25
|
|
27
|
-
Arrow::
|
28
|
-
reader = Arrow::RecordBatchStreamReader.new(
|
26
|
+
Arrow::BufferInputStream.open(buffer) do |input|
|
27
|
+
reader = Arrow::RecordBatchStreamReader.new(input)
|
29
28
|
reader.schema
|
30
29
|
end
|
31
30
|
end
|
32
31
|
end
|
33
32
|
|
34
33
|
def get_schema_bytes(schema, record_batch)
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
end
|
34
|
+
buffer = Arrow::ResizableBuffer.new(0)
|
35
|
+
Arrow::BufferOutputStream.open(buffer) do |output|
|
36
|
+
Arrow::RecordBatchStreamWriter.open(output, schema) do |writer|
|
37
|
+
writer.write_record_batch(record_batch)
|
40
38
|
end
|
41
|
-
f.flush
|
42
39
|
|
43
|
-
|
44
|
-
start_index = get_record_batch_index(
|
45
|
-
|
40
|
+
bytes = buffer.data.to_s
|
41
|
+
start_index = get_record_batch_index(bytes)
|
42
|
+
bytes[4..start_index - 5]
|
46
43
|
end
|
47
44
|
end
|
48
45
|
|
49
46
|
def get_record_batch_bytes(schema, record_batch)
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
end
|
47
|
+
buffer = Arrow::ResizableBuffer.new(0)
|
48
|
+
Arrow::BufferOutputStream.open(buffer) do |output|
|
49
|
+
Arrow::RecordBatchStreamWriter.open(output, schema) do |writer|
|
50
|
+
writer.write_record_batch(record_batch)
|
55
51
|
end
|
56
|
-
f.flush
|
57
52
|
|
58
|
-
|
59
|
-
start_index = get_record_batch_index(
|
60
|
-
|
53
|
+
bytes = buffer.data.to_s
|
54
|
+
start_index = get_record_batch_index(bytes)
|
55
|
+
bytes[start_index..]
|
61
56
|
end
|
62
57
|
end
|
63
58
|
|
64
|
-
def get_record_batch_index(
|
65
|
-
size =
|
59
|
+
def get_record_batch_index(bytes)
|
60
|
+
size = bytes.size
|
66
61
|
found_count = 0
|
67
62
|
start_index = 0
|
68
63
|
0.upto(size - 4).each do |i|
|
69
|
-
has_ffff =
|
70
|
-
|
71
|
-
end
|
64
|
+
has_ffff = bytes.slice(i, 4) == "\xFF\xFF\xFF\xFF".b
|
65
|
+
|
72
66
|
found_count += 1 if has_ffff
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
67
|
+
next unless found_count == 2
|
68
|
+
|
69
|
+
start_index = i + 4
|
70
|
+
break
|
77
71
|
end
|
78
72
|
|
79
73
|
start_index
|
data/lib/athena-udf/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: athena-udf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Daisuke Taniwaki
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-09-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: base64
|