athena-udf 0.1.3 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +5 -0
- data/Dockerfile.dev +32 -0
- data/Dockerfile.example +29 -12
- data/Gemfile +1 -0
- data/Gemfile.lock +35 -33
- data/README.md +10 -1
- data/lib/athena-udf/base_udf.rb +20 -19
- data/lib/athena-udf/utils.rb +8 -22
- data/lib/athena-udf/version.rb +1 -1
- data/scripts/benchmark.rb +105 -0
- metadata +14 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9e5ff4cb6e2f166491b1bb18cc2be70b9de602852ff9535eb7b888de86d46569
|
4
|
+
data.tar.gz: df66a5f63ea2ac6a036d4f47befd0dc55ae0f8487de80a8d7d1da8d38606307d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 18e43fce15698f0fc1dd0160cb0098887cf78085ca13774e72819cc89369b3fd466b6bf27a216e1ef1ff863caa412881f0fab4ba5df9af62b6cc0ddfa77f2e85
|
7
|
+
data.tar.gz: eba9d3b48c2adabe2c02c6a22686e0f8b7a4ee35f535738d9a72a7384948765c14a4d978e3061b5d43e75610bab7df8580dd8cbcb9a270bd025a7bb8f0392d5a
|
data/.rubocop.yml
CHANGED
@@ -24,6 +24,7 @@ Metrics/AbcSize:
|
|
24
24
|
Metrics/BlockLength:
|
25
25
|
Exclude:
|
26
26
|
- "spec/**/*"
|
27
|
+
- "scripts/benchmark.rb"
|
27
28
|
|
28
29
|
Style/TrailingCommaInArguments:
|
29
30
|
EnforcedStyleForMultiline: comma
|
@@ -37,3 +38,7 @@ Style/TrailingCommaInHashLiteral:
|
|
37
38
|
Style/ClassVars:
|
38
39
|
Enabled: false
|
39
40
|
|
41
|
+
Naming/VariableNumber:
|
42
|
+
Exclude:
|
43
|
+
- "scripts/benchmark.rb"
|
44
|
+
|
data/Dockerfile.dev
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
FROM ruby:3.2
|
2
|
+
|
3
|
+
# General packages
|
4
|
+
RUN apt-get update -y \
|
5
|
+
&& apt-get install -y \
|
6
|
+
build-essential \
|
7
|
+
ca-certificates \
|
8
|
+
lsb-release \
|
9
|
+
wget \
|
10
|
+
vim \
|
11
|
+
&& apt-get clean
|
12
|
+
|
13
|
+
# Apache Arrow
|
14
|
+
RUN apt-get update -y \
|
15
|
+
&& wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb \
|
16
|
+
&& apt install -y ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb \
|
17
|
+
&& apt-get update -y \
|
18
|
+
&& apt-get install -y \
|
19
|
+
libarrow-dev \
|
20
|
+
libarrow-glib-dev \
|
21
|
+
&& apt-get clean
|
22
|
+
|
23
|
+
# Update bundler
|
24
|
+
RUN gem update bundler
|
25
|
+
|
26
|
+
WORKDIR /src
|
27
|
+
|
28
|
+
COPY Gemfile Gemfile.lock athena-udf.gemspec /src
|
29
|
+
COPY lib/athena-udf/version.rb /src/lib/athena-udf/version.rb
|
30
|
+
RUN bundle install
|
31
|
+
|
32
|
+
CMD ["/bin/bash"]
|
data/Dockerfile.example
CHANGED
@@ -1,20 +1,36 @@
|
|
1
|
-
|
1
|
+
# Can NOT install apache-arrow on the amazonlinux:2023-minimal image,
|
2
|
+
# so install the ruby directly on amazonlinux:2023.
|
3
|
+
FROM public.ecr.aws/amazonlinux/amazonlinux:2023 AS builder
|
4
|
+
|
5
|
+
# Apache Arrow
|
6
|
+
RUN dnf upgrade -y --releasever=latest \
|
7
|
+
&& dnf install -y https://apache.jfrog.io/artifactory/arrow/amazon-linux/$(cut -d: -f6 /etc/system-release-cpe)/apache-arrow-release-latest.rpm \
|
8
|
+
&& dnf install -y arrow-devel arrow-glib-devel arrow-dataset-devel arrow-dataset-glib-devel
|
9
|
+
|
10
|
+
############
|
11
|
+
|
12
|
+
FROM public.ecr.aws/lambda/ruby:3.3
|
13
|
+
|
14
|
+
COPY --from=builder /usr/include/parquet/arrow/ /usr/include/parquet/arrow/
|
15
|
+
COPY --from=builder /usr/include/arrow* /usr/include/
|
16
|
+
COPY --from=builder /usr/share/gdb/auto-load/usr/lib64/libarrow.so* /usr/share/gdb/auto-load/usr/lib64/
|
17
|
+
COPY --from=builder /usr/share/licenses/arrow* /usr/share/licenses/
|
18
|
+
COPY --from=builder /usr/share/doc/arrow* /usr/share/doc/
|
19
|
+
COPY --from=builder /usr/share/arrow* /usr/share/
|
20
|
+
COPY --from=builder /usr/share/vala/vapi/arrow* /usr/share/vala/vapi/
|
21
|
+
COPY --from=builder /usr/lib64/libarrow* /usr/lib64/
|
22
|
+
COPY --from=builder /usr/lib64/pkgconfig/arrow* /usr/lib64/pkgconfig/
|
23
|
+
COPY --from=builder /usr/lib64/libarrow* /usr/lib64/libarrow/
|
24
|
+
COPY --from=builder /usr/lib64/cmake/Arrow/ /usr/lib64/cmake/Arrow/
|
2
25
|
|
3
26
|
# General packages
|
4
|
-
RUN
|
5
|
-
&&
|
6
|
-
amazon-linux-extras \
|
27
|
+
RUN dnf upgrade -y --releasever=latest \
|
28
|
+
&& dnf install -y \
|
7
29
|
gcc-c++ \
|
30
|
+
glib2-devel \
|
8
31
|
make \
|
9
32
|
git \
|
10
|
-
&&
|
11
|
-
&& yum clean all
|
12
|
-
|
13
|
-
# Apache Arrow
|
14
|
-
RUN yum update -y \
|
15
|
-
&& yum install -y https://apache.jfrog.io/artifactory/arrow/amazon-linux/2/apache-arrow-release-latest.rpm \
|
16
|
-
&& yum install -y --enablerepo=epel arrow-devel arrow-glib-devel arrow-dataset-devel arrow-dataset-glib-devel \
|
17
|
-
&& yum clean all
|
33
|
+
&& dnf clean all
|
18
34
|
|
19
35
|
# Update bundler
|
20
36
|
RUN gem update bundler
|
@@ -29,3 +45,4 @@ RUN bundle config set --local without development \
|
|
29
45
|
COPY . ${LAMBDA_TASK_ROOT}
|
30
46
|
|
31
47
|
CMD ["example.SimpleVarcharUDF.lambda_handler"]
|
48
|
+
|
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,39 +1,41 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
athena-udf (0.
|
4
|
+
athena-udf (0.2.0)
|
5
5
|
base64
|
6
6
|
csv
|
7
|
-
red-arrow (
|
7
|
+
red-arrow (< 20.0.0)
|
8
8
|
|
9
9
|
GEM
|
10
10
|
remote: https://rubygems.org/
|
11
11
|
specs:
|
12
12
|
ast (2.4.2)
|
13
13
|
base64 (0.2.0)
|
14
|
-
|
15
|
-
|
16
|
-
|
14
|
+
benchmark (0.3.0)
|
15
|
+
bigdecimal (3.1.9)
|
16
|
+
csv (3.3.2)
|
17
|
+
diff-lcs (1.6.0)
|
17
18
|
docile (1.4.1)
|
18
19
|
extpp (0.1.1)
|
19
|
-
fiddle (1.1.
|
20
|
-
gem-release (2.2.
|
21
|
-
gio2 (4.2.
|
20
|
+
fiddle (1.1.6)
|
21
|
+
gem-release (2.2.4)
|
22
|
+
gio2 (4.2.7)
|
22
23
|
fiddle
|
23
|
-
gobject-introspection (= 4.2.
|
24
|
-
glib2 (4.2.
|
24
|
+
gobject-introspection (= 4.2.7)
|
25
|
+
glib2 (4.2.7)
|
25
26
|
native-package-installer (>= 1.0.3)
|
26
27
|
pkg-config (>= 1.3.5)
|
27
|
-
gobject-introspection (4.2.
|
28
|
-
glib2 (= 4.2.
|
29
|
-
json (2.
|
30
|
-
language_server-protocol (3.17.0.
|
28
|
+
gobject-introspection (4.2.7)
|
29
|
+
glib2 (= 4.2.7)
|
30
|
+
json (2.10.2)
|
31
|
+
language_server-protocol (3.17.0.4)
|
32
|
+
lint_roller (1.1.0)
|
31
33
|
native-package-installer (1.1.9)
|
32
34
|
parallel (1.26.3)
|
33
|
-
parser (3.3.
|
35
|
+
parser (3.3.7.1)
|
34
36
|
ast (~> 2.4.1)
|
35
37
|
racc
|
36
|
-
pkg-config (1.
|
38
|
+
pkg-config (1.6.0)
|
37
39
|
racc (1.8.1)
|
38
40
|
rainbow (3.1.1)
|
39
41
|
rake (13.2.1)
|
@@ -43,45 +45,44 @@ GEM
|
|
43
45
|
gio2 (>= 3.5.0)
|
44
46
|
native-package-installer
|
45
47
|
pkg-config
|
46
|
-
regexp_parser (2.
|
47
|
-
rexml (3.3.5)
|
48
|
-
strscan
|
48
|
+
regexp_parser (2.10.0)
|
49
49
|
rspec (3.13.0)
|
50
50
|
rspec-core (~> 3.13.0)
|
51
51
|
rspec-expectations (~> 3.13.0)
|
52
52
|
rspec-mocks (~> 3.13.0)
|
53
|
-
rspec-core (3.13.
|
53
|
+
rspec-core (3.13.3)
|
54
54
|
rspec-support (~> 3.13.0)
|
55
|
-
rspec-expectations (3.13.
|
55
|
+
rspec-expectations (3.13.3)
|
56
56
|
diff-lcs (>= 1.2.0, < 2.0)
|
57
57
|
rspec-support (~> 3.13.0)
|
58
|
-
rspec-mocks (3.13.
|
58
|
+
rspec-mocks (3.13.2)
|
59
59
|
diff-lcs (>= 1.2.0, < 2.0)
|
60
60
|
rspec-support (~> 3.13.0)
|
61
|
-
rspec-support (3.13.
|
62
|
-
rubocop (1.
|
61
|
+
rspec-support (3.13.2)
|
62
|
+
rubocop (1.74.0)
|
63
63
|
json (~> 2.3)
|
64
|
-
language_server-protocol (
|
64
|
+
language_server-protocol (~> 3.17.0.2)
|
65
|
+
lint_roller (~> 1.1.0)
|
65
66
|
parallel (~> 1.10)
|
66
67
|
parser (>= 3.3.0.2)
|
67
68
|
rainbow (>= 2.2.2, < 4.0)
|
68
|
-
regexp_parser (>= 2.
|
69
|
-
|
70
|
-
rubocop-ast (>= 1.31.1, < 2.0)
|
69
|
+
regexp_parser (>= 2.9.3, < 3.0)
|
70
|
+
rubocop-ast (>= 1.38.0, < 2.0)
|
71
71
|
ruby-progressbar (~> 1.7)
|
72
|
-
unicode-display_width (>= 2.4.0, <
|
73
|
-
rubocop-ast (1.
|
72
|
+
unicode-display_width (>= 2.4.0, < 4.0)
|
73
|
+
rubocop-ast (1.38.1)
|
74
74
|
parser (>= 3.3.1.0)
|
75
75
|
ruby-progressbar (1.13.0)
|
76
76
|
simplecov (0.22.0)
|
77
77
|
docile (~> 1.1)
|
78
78
|
simplecov-html (~> 0.11)
|
79
79
|
simplecov_json_formatter (~> 0.1)
|
80
|
-
simplecov-html (0.
|
80
|
+
simplecov-html (0.13.1)
|
81
81
|
simplecov-lcov (0.8.0)
|
82
82
|
simplecov_json_formatter (0.1.4)
|
83
|
-
|
84
|
-
|
83
|
+
unicode-display_width (3.1.4)
|
84
|
+
unicode-emoji (~> 4.0, >= 4.0.4)
|
85
|
+
unicode-emoji (4.0.4)
|
85
86
|
|
86
87
|
PLATFORMS
|
87
88
|
arm64-darwin-23
|
@@ -89,6 +90,7 @@ PLATFORMS
|
|
89
90
|
|
90
91
|
DEPENDENCIES
|
91
92
|
athena-udf!
|
93
|
+
benchmark (~> 0.3.0)
|
92
94
|
gem-release (~> 2.2)
|
93
95
|
rake (~> 13.0)
|
94
96
|
rspec (~> 3.0)
|
data/README.md
CHANGED
@@ -31,7 +31,7 @@ Just make a subclass of `AthenaUDF::BaseUDF` and implement a concrete function l
|
|
31
31
|
require "athena-udf"
|
32
32
|
|
33
33
|
class SimpleVarcharUDF < AthenaUDF::BaseUDF
|
34
|
-
def
|
34
|
+
def handle_athena_record(_input_schema, _output_schema, record)
|
35
35
|
[record[0].downcase]
|
36
36
|
end
|
37
37
|
end
|
@@ -83,6 +83,15 @@ $ aws iam attach-role-policy --role-name athena-udf-simple-varchar --policy-arn
|
|
83
83
|
$ aws lambda create-function --function-name athena-udf-simple-varchar --package-type Image --role arn:aws:iam::<ACCOUNT_ID>:role/athena-udf-simple-varchar --code ImageUri=<ACCOUNT_ID>.dkr.ecr.<AWS_REGION>.amazonaws.com/athena-udf-test:latest --publish
|
84
84
|
```
|
85
85
|
|
86
|
+
## Development
|
87
|
+
|
88
|
+
You can use the dev container image, which includes necessary packages, to develop this library.
|
89
|
+
|
90
|
+
```sh
|
91
|
+
$ docker build -t ruby-athena-udf-dev -f Dockerfile.dev .
|
92
|
+
$ docker run -v $PWD:/src -it ruby-athena-udf-dev
|
93
|
+
```
|
94
|
+
|
86
95
|
## Contributing
|
87
96
|
|
88
97
|
Bug reports and pull requests are welcome on GitHub at https://github.com/dtaniwaki/ruby-athena-udf.
|
data/lib/athena-udf/base_udf.rb
CHANGED
@@ -9,33 +9,43 @@ require_relative 'utils'
|
|
9
9
|
|
10
10
|
module AthenaUDF
|
11
11
|
class BaseUDF
|
12
|
-
|
12
|
+
include AthenaUDF::Utils
|
13
13
|
|
14
|
-
|
15
|
-
@@logger.level = Logger.const_get(ENV.fetch('LOG_LEVEL', 'WARN').upcase)
|
14
|
+
attr_reader :logger
|
16
15
|
|
17
|
-
def self.lambda_handler(event:, context:)
|
16
|
+
def self.lambda_handler(event:, context:)
|
17
|
+
instance = new(event:, context:)
|
18
18
|
incoming_type = event['@type']
|
19
19
|
if incoming_type == 'PingRequest'
|
20
|
-
return handle_ping(event)
|
20
|
+
return instance.handle_ping(event)
|
21
21
|
elsif incoming_type == 'UserDefinedFunctionRequest'
|
22
|
-
return handle_udf_request(event)
|
22
|
+
return instance.handle_udf_request(event)
|
23
23
|
end
|
24
24
|
|
25
25
|
raise "Unknown event type #{incoming_type} from Athena"
|
26
26
|
end
|
27
27
|
|
28
|
-
|
28
|
+
# About capabilities: https://github.com/awslabs/aws-athena-query-federation/blob/f52d929a109099a1e7180fa242e26331137ed84c/athena-federation-sdk/src/main/java/com/amazonaws/athena/connector/lambda/handlers/FederationCapabilities.java#L29-L32
|
29
|
+
def self.capabilities
|
30
|
+
1
|
31
|
+
end
|
32
|
+
|
33
|
+
def initialize(event:, context:) # rubocop:disable Lint/UnusedMethodArgument
|
34
|
+
@logger = Logger.new($stdout)
|
35
|
+
@logger.level = Logger.const_get(ENV.fetch('LOG_LEVEL', 'WARN').upcase)
|
36
|
+
end
|
37
|
+
|
38
|
+
def handle_ping(event)
|
29
39
|
{
|
30
40
|
'@type' => 'PingResponse',
|
31
41
|
'catalogName' => 'event',
|
32
42
|
'queryId' => event['queryId'],
|
33
43
|
'sourceType' => 'athena_udf',
|
34
|
-
'capabilities' => capabilities,
|
44
|
+
'capabilities' => self.class.capabilities,
|
35
45
|
}
|
36
46
|
end
|
37
47
|
|
38
|
-
def
|
48
|
+
def handle_udf_request(event)
|
39
49
|
# Cannot find a way to write Arrow::RecordBatch to a buffer directly in Ruby.
|
40
50
|
|
41
51
|
output_schema = read_schema(Base64.decode64(event['outputSchema']['schema']))
|
@@ -66,17 +76,8 @@ module AthenaUDF
|
|
66
76
|
}
|
67
77
|
end
|
68
78
|
|
69
|
-
|
70
|
-
def self.capabilities
|
71
|
-
1
|
72
|
-
end
|
73
|
-
|
74
|
-
def self.handle_athena_record(input_schema, output_schema, records)
|
79
|
+
def handle_athena_record(input_schema, output_schema, records)
|
75
80
|
raise NotImplementedError
|
76
81
|
end
|
77
|
-
|
78
|
-
def self.logger
|
79
|
-
@@logger
|
80
|
-
end
|
81
82
|
end
|
82
83
|
end
|
data/lib/athena-udf/utils.rb
CHANGED
@@ -2,6 +2,9 @@
|
|
2
2
|
|
3
3
|
module AthenaUDF
|
4
4
|
module Utils
|
5
|
+
SEPARATOR = "\xFF\xFF\xFF\xFF".b
|
6
|
+
SEPARATOR_SIZE = SEPARATOR.bytesize
|
7
|
+
|
5
8
|
def read_record_batches(schema_data, record_batch_data)
|
6
9
|
buffer = Arrow::ResizableBuffer.new(schema_data.bytes.size + record_batch_data.bytes.size)
|
7
10
|
Arrow::BufferOutputStream.open(buffer) do |output|
|
@@ -30,16 +33,16 @@ module AthenaUDF
|
|
30
33
|
end
|
31
34
|
end
|
32
35
|
|
33
|
-
def get_schema_bytes(schema
|
36
|
+
def get_schema_bytes(schema)
|
34
37
|
buffer = Arrow::ResizableBuffer.new(0)
|
35
38
|
Arrow::BufferOutputStream.open(buffer) do |output|
|
36
39
|
Arrow::RecordBatchStreamWriter.open(output, schema) do |writer|
|
37
|
-
|
40
|
+
# noop
|
38
41
|
end
|
39
42
|
|
40
43
|
bytes = buffer.data.to_s
|
41
|
-
|
42
|
-
bytes[
|
44
|
+
last_index = bytes.index(SEPARATOR, SEPARATOR_SIZE)
|
45
|
+
bytes[SEPARATOR_SIZE...last_index]
|
43
46
|
end
|
44
47
|
end
|
45
48
|
|
@@ -51,26 +54,9 @@ module AthenaUDF
|
|
51
54
|
end
|
52
55
|
|
53
56
|
bytes = buffer.data.to_s
|
54
|
-
start_index =
|
57
|
+
start_index = bytes.index(SEPARATOR, SEPARATOR_SIZE) + SEPARATOR_SIZE
|
55
58
|
bytes[start_index..]
|
56
59
|
end
|
57
60
|
end
|
58
|
-
|
59
|
-
def get_record_batch_index(bytes)
|
60
|
-
size = bytes.size
|
61
|
-
found_count = 0
|
62
|
-
start_index = 0
|
63
|
-
0.upto(size - 4).each do |i|
|
64
|
-
has_ffff = bytes.slice(i, 4) == "\xFF\xFF\xFF\xFF".b
|
65
|
-
|
66
|
-
found_count += 1 if has_ffff
|
67
|
-
next unless found_count == 2
|
68
|
-
|
69
|
-
start_index = i + 4
|
70
|
-
break
|
71
|
-
end
|
72
|
-
|
73
|
-
start_index
|
74
|
-
end
|
75
61
|
end
|
76
62
|
end
|
data/lib/athena-udf/version.rb
CHANGED
@@ -0,0 +1,105 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'benchmark'
|
4
|
+
require 'athena_udf'
|
5
|
+
|
6
|
+
Benchmark.bm 20 do |r|
|
7
|
+
include AthenaUDF::Utils
|
8
|
+
|
9
|
+
instance = Class.new(AthenaUDF::BaseUDF) do
|
10
|
+
def handle_athena_record(_input_schema, _output_schema, record)
|
11
|
+
[record[0]]
|
12
|
+
end
|
13
|
+
end.new(event: {}, context: {})
|
14
|
+
|
15
|
+
input_schema_1 = Arrow::Schema.new("0": :string)
|
16
|
+
input_schema_bytes_1 = get_schema_bytes(input_schema_1)
|
17
|
+
input_schema_100 = Arrow::Schema.new(0.upto(100).map { |n| [n.to_s, :string] }.to_h)
|
18
|
+
input_schema_bytes_100 = get_schema_bytes(input_schema_100)
|
19
|
+
output_schema = Arrow::Schema.new("0": :string)
|
20
|
+
output_schema_bytes = get_schema_bytes(output_schema)
|
21
|
+
|
22
|
+
input_table1_1 = Arrow::Table.new(input_schema_1, [['FooBar']])
|
23
|
+
input_records_bytes_1_1 = get_record_batch_bytes(input_schema_1, input_table1_1.each_record_batch.first)
|
24
|
+
event_1_1 = {
|
25
|
+
'@type' => 'UserDefinedFunctionRequest',
|
26
|
+
'inputRecords' => {
|
27
|
+
'schema' => Base64.strict_encode64(input_schema_bytes_1),
|
28
|
+
'records' => Base64.strict_encode64(input_records_bytes_1_1),
|
29
|
+
},
|
30
|
+
'methodName' => 'lower',
|
31
|
+
'outputSchema' => {
|
32
|
+
'schema' => Base64.strict_encode64(output_schema_bytes),
|
33
|
+
},
|
34
|
+
'functionType' => 'SCALAR',
|
35
|
+
}
|
36
|
+
|
37
|
+
r.report '1 record 1 column' do
|
38
|
+
100.times do
|
39
|
+
instance.handle_udf_request(event_1_1)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
input_table100_1 = Arrow::Table.new(input_schema_1, [['FooBar']] * 100)
|
44
|
+
input_records_bytes_100_1 = get_record_batch_bytes(input_schema_1, input_table100_1.each_record_batch.first)
|
45
|
+
event_100 = {
|
46
|
+
'@type' => 'UserDefinedFunctionRequest',
|
47
|
+
'inputRecords' => {
|
48
|
+
'schema' => Base64.strict_encode64(input_schema_bytes_1),
|
49
|
+
'records' => Base64.strict_encode64(input_records_bytes_100_1),
|
50
|
+
},
|
51
|
+
'methodName' => 'lower',
|
52
|
+
'outputSchema' => {
|
53
|
+
'schema' => Base64.strict_encode64(output_schema_bytes),
|
54
|
+
},
|
55
|
+
'functionType' => 'SCALAR',
|
56
|
+
}
|
57
|
+
|
58
|
+
r.report '100 records 1 column' do
|
59
|
+
100.times do
|
60
|
+
instance.handle_udf_request(event_100)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
input_table1_100 = Arrow::Table.new(input_schema_100, [['FooBar'] * 100])
|
65
|
+
input_records_bytes_1_100 = get_record_batch_bytes(input_schema_100, input_table1_100.each_record_batch.first)
|
66
|
+
event_1_100 = {
|
67
|
+
'@type' => 'UserDefinedFunctionRequest',
|
68
|
+
'inputRecords' => {
|
69
|
+
'schema' => Base64.strict_encode64(input_schema_bytes_100),
|
70
|
+
'records' => Base64.strict_encode64(input_records_bytes_1_100),
|
71
|
+
},
|
72
|
+
'methodName' => 'lower',
|
73
|
+
'outputSchema' => {
|
74
|
+
'schema' => Base64.strict_encode64(output_schema_bytes),
|
75
|
+
},
|
76
|
+
'functionType' => 'SCALAR',
|
77
|
+
}
|
78
|
+
|
79
|
+
r.report '1 record 100 column' do
|
80
|
+
100.times do
|
81
|
+
instance.handle_udf_request(event_1_100)
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
input_table_100_100 = Arrow::Table.new(input_schema_100, [['FooBar'] * 100] * 100)
|
86
|
+
input_records_bytes_100_100 = get_record_batch_bytes(input_schema_100, input_table_100_100.each_record_batch.first)
|
87
|
+
event_100_100 = {
|
88
|
+
'@type' => 'UserDefinedFunctionRequest',
|
89
|
+
'inputRecords' => {
|
90
|
+
'schema' => Base64.strict_encode64(input_schema_bytes_100),
|
91
|
+
'records' => Base64.strict_encode64(input_records_bytes_100_100),
|
92
|
+
},
|
93
|
+
'methodName' => 'lower',
|
94
|
+
'outputSchema' => {
|
95
|
+
'schema' => Base64.strict_encode64(output_schema_bytes),
|
96
|
+
},
|
97
|
+
'functionType' => 'SCALAR',
|
98
|
+
}
|
99
|
+
|
100
|
+
r.report '100 record 100 column' do
|
101
|
+
100.times do
|
102
|
+
instance.handle_udf_request(event_100_100)
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: athena-udf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Daisuke Taniwaki
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2025-03-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: base64
|
@@ -42,16 +42,22 @@ dependencies:
|
|
42
42
|
name: red-arrow
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
-
- - "
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 12.0.0
|
48
|
+
- - "<"
|
46
49
|
- !ruby/object:Gem::Version
|
47
|
-
version:
|
50
|
+
version: 20.0.0
|
48
51
|
type: :runtime
|
49
52
|
prerelease: false
|
50
53
|
version_requirements: !ruby/object:Gem::Requirement
|
51
54
|
requirements:
|
52
|
-
- - "
|
55
|
+
- - ">="
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
version: 12.0.0
|
58
|
+
- - "<"
|
53
59
|
- !ruby/object:Gem::Version
|
54
|
-
version:
|
60
|
+
version: 20.0.0
|
55
61
|
description: ''
|
56
62
|
email:
|
57
63
|
- daisuketaniwaki@gmail.com
|
@@ -62,6 +68,7 @@ files:
|
|
62
68
|
- ".dockerignore"
|
63
69
|
- ".rspec"
|
64
70
|
- ".rubocop.yml"
|
71
|
+
- Dockerfile.dev
|
65
72
|
- Dockerfile.example
|
66
73
|
- Gemfile
|
67
74
|
- Gemfile.lock
|
@@ -73,6 +80,7 @@ files:
|
|
73
80
|
- lib/athena-udf/utils.rb
|
74
81
|
- lib/athena-udf/version.rb
|
75
82
|
- lib/athena_udf.rb
|
83
|
+
- scripts/benchmark.rb
|
76
84
|
homepage: https://github.com/dtaniwaki/ruby-athena-udf
|
77
85
|
licenses:
|
78
86
|
- MIT
|