activerecord_athena 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +21 -0
- data/README.md +104 -0
- data/lib/active_record/connection_adapters/athena/database_tasks.rb +47 -0
- data/lib/active_record/connection_adapters/athena/schema_dumper.rb +78 -0
- data/lib/active_record/connection_adapters/athena/schema_statements.rb +188 -0
- data/lib/active_record/connection_adapters/athena_adapter.rb +352 -0
- data/lib/activerecord_athena/version.rb +3 -0
- data/lib/activerecord_athena.rb +7 -0
- metadata +150 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: a035d8c2fbb40cec7a7e2b7332e1de34b7dff1adbdd737477425f4ef6d535d2f
|
4
|
+
data.tar.gz: ddeb554be40247d1d9602c2ca40b204fe2b0d0e623177f910b0fd994b416623e
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: a02fd84206cebf99e0d54d26b70db80366da546e81a9c2571a550412f327a905542dc4e829b0d27af4d18fdb9a318e249e991b5a1a78b1fc6577da494026dc72
|
7
|
+
data.tar.gz: 66716bb6504915749ca13ee869ff9fa1e49a5cccd1b3f0ab6a5afe0ddeab97afa3562a1928411d6508de3abaf0ed1ce53a70a61b85044ab10230ecb2b79fdcb0
|
data/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2025 Jeremy Hinkle
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,104 @@
|
|
1
|
+
# ActiveRecord Athena Adapter
|
2
|
+
|
3
|
+
An ActiveRecord adapter for AWS Athena that enables Rails applications to connect to and query AWS Athena.
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
```ruby
|
10
|
+
gem 'activerecord_athena'
|
11
|
+
```
|
12
|
+
|
13
|
+
And then execute:
|
14
|
+
|
15
|
+
$ bundle install
|
16
|
+
|
17
|
+
Or install it yourself as:
|
18
|
+
|
19
|
+
$ gem install activerecord_athena
|
20
|
+
|
21
|
+
## Usage
|
22
|
+
|
23
|
+
### Configuration
|
24
|
+
|
25
|
+
Add the following to your `database.yml`:
|
26
|
+
|
27
|
+
```yaml
|
28
|
+
development:
|
29
|
+
adapter: athena
|
30
|
+
database: your_athena_database
|
31
|
+
s3_output_location: s3://your-bucket/query-results/
|
32
|
+
work_group: primary
|
33
|
+
connection_options:
|
34
|
+
aws_config:
|
35
|
+
region: us-east-1
|
36
|
+
access_key_id: <%= ENV['AWS_ACCESS_KEY_ID'] %>
|
37
|
+
secret_access_key: <%= ENV['AWS_SECRET_ACCESS_KEY'] %>
|
38
|
+
```
|
39
|
+
|
40
|
+
### Basic Usage
|
41
|
+
|
42
|
+
```ruby
|
43
|
+
# Define a model
|
44
|
+
class LogEntry < ActiveRecord::Base
|
45
|
+
self.table_name = "log_entries"
|
46
|
+
end
|
47
|
+
|
48
|
+
# Query data
|
49
|
+
LogEntry.where("timestamp > ?", 1.day.ago).limit(100)
|
50
|
+
|
51
|
+
# Raw SQL queries
|
52
|
+
ActiveRecord::Base.connection.execute("SELECT * FROM log_entries LIMIT 10")
|
53
|
+
```
|
54
|
+
|
55
|
+
### Limitations
|
56
|
+
|
57
|
+
Due to the nature of AWS Athena, this adapter has several limitations:
|
58
|
+
|
59
|
+
- **No migrations**: Athena doesn't support traditional CREATE TABLE statements. Tables are typically created as external tables pointing to S3 data.
|
60
|
+
- **No primary keys**: Athena doesn't support primary key constraints.
|
61
|
+
- **No foreign keys**: Athena doesn't support foreign key constraints.
|
62
|
+
- **No indexes**: Athena doesn't support traditional indexes.
|
63
|
+
- **Limited write operations**: Traditional UPDATE/DELETE operations require Iceberg or Delta Lake table formats.
|
64
|
+
|
65
|
+
### Supported Operations
|
66
|
+
|
67
|
+
- `SELECT` queries with `WHERE`, `ORDER BY`, `LIMIT`, etc.
|
68
|
+
- `SHOW TABLES` - list all tables in the database
|
69
|
+
- `DESCRIBE table` - get table schema information
|
70
|
+
- `DROP TABLE` - remove external tables
|
71
|
+
- `INSERT` statements - add new data to tables
|
72
|
+
- `UPDATE` statements - limited support (requires Iceberg/Delta Lake tables)
|
73
|
+
- `DELETE` statements - limited support (requires Iceberg/Delta Lake tables)
|
74
|
+
|
75
|
+
### AWS Configuration
|
76
|
+
|
77
|
+
The adapter requires proper AWS credentials and permissions. You can configure these through:
|
78
|
+
|
79
|
+
1. Environment variables (`AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`)
|
80
|
+
2. IAM roles (recommended for EC2/ECS deployments)
|
81
|
+
3. AWS credentials file
|
82
|
+
4. Direct configuration in `database.yml`
|
83
|
+
|
84
|
+
Required permissions:
|
85
|
+
- `athena:StartQueryExecution`
|
86
|
+
- `athena:GetQueryExecution`
|
87
|
+
- `athena:GetQueryResults`
|
88
|
+
- `s3:GetObject`
|
89
|
+
- `s3:ListBucket`
|
90
|
+
- `s3:PutObject` (for query results)
|
91
|
+
|
92
|
+
## Development
|
93
|
+
|
94
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests.
|
95
|
+
|
96
|
+
To install this gem onto your local machine, run `bundle exec rake install`.
|
97
|
+
|
98
|
+
## Contributing
|
99
|
+
|
100
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/yourusername/activerecord_athena.
|
101
|
+
|
102
|
+
## License
|
103
|
+
|
104
|
+
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
@@ -0,0 +1,47 @@
|
|
1
|
+
module ActiveRecord
|
2
|
+
module ConnectionAdapters
|
3
|
+
module Athena
|
4
|
+
class DatabaseTasks
|
5
|
+
def self.create(config)
|
6
|
+
# Athena databases are typically created through AWS console or CLI
|
7
|
+
# This is a placeholder for future implementation
|
8
|
+
puts "Athena databases should be created through AWS console or CLI"
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.drop(config)
|
12
|
+
# Athena databases are typically dropped through AWS console or CLI
|
13
|
+
# This is a placeholder for future implementation
|
14
|
+
puts "Athena databases should be dropped through AWS console or CLI"
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.purge(config)
|
18
|
+
# Athena doesn't support traditional purge operations
|
19
|
+
puts "Athena doesn't support purge operations"
|
20
|
+
end
|
21
|
+
|
22
|
+
def self.charset(config)
|
23
|
+
"UTF-8"
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.collation(config)
|
27
|
+
nil
|
28
|
+
end
|
29
|
+
|
30
|
+
def self.structure_dump(config, filename)
|
31
|
+
# Athena structure dump would involve listing tables and their schemas
|
32
|
+
# This is a placeholder for future implementation
|
33
|
+
File.write(filename, "-- Athena structure dump placeholder\n")
|
34
|
+
end
|
35
|
+
|
36
|
+
def self.structure_load(config, filename)
|
37
|
+
# Loading structure in Athena would involve creating tables
|
38
|
+
# This is a placeholder for future implementation
|
39
|
+
puts "Structure loading for Athena needs to be implemented"
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
# Register the database tasks
|
47
|
+
ActiveRecord::Tasks::DatabaseTasks.register_task(/athena/, "ActiveRecord::ConnectionAdapters::Athena::DatabaseTasks")
|
@@ -0,0 +1,78 @@
|
|
1
|
+
module ActiveRecord
|
2
|
+
module ConnectionAdapters
|
3
|
+
module Athena
|
4
|
+
class SchemaDumper < ConnectionAdapters::SchemaDumper
|
5
|
+
private
|
6
|
+
|
7
|
+
def header(stream)
|
8
|
+
stream.puts <<~HEADER
|
9
|
+
# This file is auto-generated from the current state of the database. Instead
|
10
|
+
# of editing this file, please use the migrations feature of Active Record to
|
11
|
+
# incrementally modify your database, and then regenerate this schema definition.
|
12
|
+
#
|
13
|
+
# Note that this schema.rb definition is the authoritative source for your
|
14
|
+
# database schema. If you need to create the application database on another
|
15
|
+
# system, you should be using db:schema:load, not running all the migrations
|
16
|
+
# from scratch. The latter is a flawed and unsustainable approach (the more
|
17
|
+
# migrations you'll amass, the slower it'll run and the greater likelihood for
|
18
|
+
# issues).
|
19
|
+
#
|
20
|
+
# It's strongly recommended that you check this file into your version control system.
|
21
|
+
|
22
|
+
ActiveRecord::Schema.define(version: #{ActiveRecord::Migrator.current_version}) do
|
23
|
+
HEADER
|
24
|
+
end
|
25
|
+
|
26
|
+
def trailer(stream)
|
27
|
+
stream.puts "end"
|
28
|
+
end
|
29
|
+
|
30
|
+
def table(table, stream)
|
31
|
+
columns = @connection.columns(table)
|
32
|
+
begin
|
33
|
+
tbl = StringIO.new
|
34
|
+
|
35
|
+
# Athena tables are typically external tables
|
36
|
+
tbl.print " create_table #{remove_prefix_and_suffix(table).inspect}"
|
37
|
+
tbl.print ", force: :cascade"
|
38
|
+
tbl.print ", options: \"EXTERNAL\""
|
39
|
+
tbl.puts " do |t|"
|
40
|
+
|
41
|
+
# Athena doesn't have traditional primary keys, so we skip that logic
|
42
|
+
columns.each do |column|
|
43
|
+
raise StandardError, "Unknown type '#{column.sql_type}' for column '#{column.name}'" unless @connection.valid_type?(column.type)
|
44
|
+
next if column.name == "id"
|
45
|
+
|
46
|
+
type, colspec = column_spec(column)
|
47
|
+
tbl.print " t.#{type} #{column.name.inspect}"
|
48
|
+
tbl.print ", #{format_colspec(colspec)}" if colspec.present?
|
49
|
+
tbl.puts
|
50
|
+
end
|
51
|
+
|
52
|
+
tbl.puts " end"
|
53
|
+
tbl.puts
|
54
|
+
|
55
|
+
indexes(table, tbl)
|
56
|
+
|
57
|
+
tbl.rewind
|
58
|
+
stream.print tbl.read
|
59
|
+
rescue => e
|
60
|
+
stream.puts "# Could not dump table #{table.inspect} because of following #{e.class}"
|
61
|
+
stream.puts "# #{e.message}"
|
62
|
+
stream.puts
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def indexes(table, stream)
|
67
|
+
# Athena doesn't support traditional indexes
|
68
|
+
# This method is left empty intentionally
|
69
|
+
end
|
70
|
+
|
71
|
+
def foreign_keys(table, stream)
|
72
|
+
# Athena doesn't support foreign keys
|
73
|
+
# This method is left empty intentionally
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
@@ -0,0 +1,188 @@
|
|
1
|
+
module ActiveRecord
|
2
|
+
module ConnectionAdapters
|
3
|
+
module Athena
|
4
|
+
module SchemaStatements
|
5
|
+
def tables
|
6
|
+
query = "SHOW TABLES"
|
7
|
+
result = execute(query)
|
8
|
+
result[:rows].map { |row| row[:data].first[:var_char_value] }
|
9
|
+
end
|
10
|
+
|
11
|
+
def data_sources
|
12
|
+
tables
|
13
|
+
end
|
14
|
+
|
15
|
+
def data_source_sql(name = nil, type: nil)
|
16
|
+
if name
|
17
|
+
"SHOW TABLES LIKE '#{name}'"
|
18
|
+
else
|
19
|
+
"SHOW TABLES"
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def data_source_exists?(name)
|
24
|
+
tables.include?(name.to_s)
|
25
|
+
end
|
26
|
+
|
27
|
+
def table_exists?(table_name)
|
28
|
+
tables.include?(table_name.to_s)
|
29
|
+
end
|
30
|
+
|
31
|
+
def columns(table_name)
|
32
|
+
# Athena doesn't support quoted table names in DESCRIBE
|
33
|
+
query = "DESCRIBE #{table_name}"
|
34
|
+
result = execute(query)
|
35
|
+
|
36
|
+
columns = []
|
37
|
+
|
38
|
+
result[:rows].each do |row|
|
39
|
+
data = row[:data]
|
40
|
+
|
41
|
+
# Skip empty rows or rows with insufficient data
|
42
|
+
next if data.nil? || data.length < 2
|
43
|
+
|
44
|
+
# Get the first column value (potential column name)
|
45
|
+
first_col = data[0][:var_char_value]
|
46
|
+
|
47
|
+
# Skip header rows and comments (lines starting with #)
|
48
|
+
next if first_col.nil? || first_col.start_with?('#') || first_col.strip.empty?
|
49
|
+
|
50
|
+
# Skip the header row that contains "col_name"
|
51
|
+
next if first_col == "col_name"
|
52
|
+
|
53
|
+
# Skip partition spec section
|
54
|
+
next if first_col == "field_name"
|
55
|
+
|
56
|
+
column_name = first_col.strip
|
57
|
+
column_type = data[1][:var_char_value].strip
|
58
|
+
|
59
|
+
sql_type = column_type
|
60
|
+
type_metadata = ActiveRecord::ConnectionAdapters::SqlTypeMetadata.new(
|
61
|
+
sql_type: sql_type,
|
62
|
+
type: lookup_cast_type_symbol(sql_type)
|
63
|
+
)
|
64
|
+
|
65
|
+
columns << ConnectionAdapters::Column.new(
|
66
|
+
column_name,
|
67
|
+
nil, # default value
|
68
|
+
type_metadata,
|
69
|
+
true # nullable - Athena columns are typically nullable
|
70
|
+
)
|
71
|
+
end
|
72
|
+
|
73
|
+
columns
|
74
|
+
end
|
75
|
+
|
76
|
+
def column_exists?(table_name, column_name, type = nil, **options)
|
77
|
+
columns(table_name).any? { |col| col.name == column_name.to_s }
|
78
|
+
end
|
79
|
+
|
80
|
+
def create_table(table_name, **options)
|
81
|
+
# Athena table creation is complex and typically involves external data sources
|
82
|
+
# This is a placeholder implementation
|
83
|
+
raise NotImplementedError, "CREATE TABLE is not supported for Athena. Use external table creation through AWS console or CLI."
|
84
|
+
end
|
85
|
+
|
86
|
+
def drop_table(table_name, **options)
|
87
|
+
# Athena may not support quoted table names in DROP TABLE
|
88
|
+
execute("DROP TABLE #{table_name}")
|
89
|
+
end
|
90
|
+
|
91
|
+
def rename_table(table_name, new_name)
|
92
|
+
raise NotImplementedError, "RENAME TABLE is not supported for Athena"
|
93
|
+
end
|
94
|
+
|
95
|
+
def add_column(table_name, column_name, type, **options)
|
96
|
+
raise NotImplementedError, "ADD COLUMN is not supported for Athena"
|
97
|
+
end
|
98
|
+
|
99
|
+
def remove_column(table_name, column_name, type = nil, **options)
|
100
|
+
raise NotImplementedError, "REMOVE COLUMN is not supported for Athena"
|
101
|
+
end
|
102
|
+
|
103
|
+
def change_column(table_name, column_name, type, **options)
|
104
|
+
raise NotImplementedError, "CHANGE COLUMN is not supported for Athena"
|
105
|
+
end
|
106
|
+
|
107
|
+
def rename_column(table_name, column_name, new_column_name)
|
108
|
+
raise NotImplementedError, "RENAME COLUMN is not supported for Athena"
|
109
|
+
end
|
110
|
+
|
111
|
+
def add_index(table_name, column_name, **options)
|
112
|
+
raise NotImplementedError, "ADD INDEX is not supported for Athena"
|
113
|
+
end
|
114
|
+
|
115
|
+
def remove_index(table_name, column_name = nil, **options)
|
116
|
+
raise NotImplementedError, "REMOVE INDEX is not supported for Athena"
|
117
|
+
end
|
118
|
+
|
119
|
+
def indexes(table_name)
|
120
|
+
# Athena doesn't support traditional indexes
|
121
|
+
[]
|
122
|
+
end
|
123
|
+
|
124
|
+
def primary_key(table_name)
|
125
|
+
# Athena doesn't support primary keys
|
126
|
+
nil
|
127
|
+
end
|
128
|
+
|
129
|
+
def foreign_keys(table_name)
|
130
|
+
# Athena doesn't support foreign keys
|
131
|
+
[]
|
132
|
+
end
|
133
|
+
|
134
|
+
private
|
135
|
+
|
136
|
+
def lookup_cast_type_symbol(sql_type)
|
137
|
+
case sql_type.downcase
|
138
|
+
when /^string/i, /^varchar/i, /^char/i
|
139
|
+
:string
|
140
|
+
when /^bigint/i, /^int/i, /^tinyint/i, /^smallint/i
|
141
|
+
:integer
|
142
|
+
when /^double/i, /^float/i
|
143
|
+
:float
|
144
|
+
when /^decimal/i
|
145
|
+
:decimal
|
146
|
+
when /^boolean/i
|
147
|
+
:boolean
|
148
|
+
when /^timestamp/i, /^datetime/i
|
149
|
+
:datetime
|
150
|
+
when /^date/i
|
151
|
+
:date
|
152
|
+
when /^time/i
|
153
|
+
:time
|
154
|
+
when /^binary/i
|
155
|
+
:binary
|
156
|
+
else
|
157
|
+
:string
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
def lookup_cast_type(sql_type)
|
162
|
+
case sql_type.downcase
|
163
|
+
when /^string/i, /^varchar/i, /^char/i
|
164
|
+
ActiveRecord::Type::String.new
|
165
|
+
when /^bigint/i, /^int/i, /^tinyint/i, /^smallint/i
|
166
|
+
ActiveRecord::Type::Integer.new
|
167
|
+
when /^double/i, /^float/i
|
168
|
+
ActiveRecord::Type::Float.new
|
169
|
+
when /^decimal/i
|
170
|
+
ActiveRecord::Type::Decimal.new
|
171
|
+
when /^boolean/i
|
172
|
+
ActiveRecord::Type::Boolean.new
|
173
|
+
when /^timestamp/i, /^datetime/i
|
174
|
+
ActiveRecord::Type::DateTime.new
|
175
|
+
when /^date/i
|
176
|
+
ActiveRecord::Type::Date.new
|
177
|
+
when /^time/i
|
178
|
+
ActiveRecord::Type::Time.new
|
179
|
+
when /^binary/i
|
180
|
+
ActiveRecord::Type::Binary.new
|
181
|
+
else
|
182
|
+
ActiveRecord::Type::String.new
|
183
|
+
end
|
184
|
+
end
|
185
|
+
end
|
186
|
+
end
|
187
|
+
end
|
188
|
+
end
|
@@ -0,0 +1,352 @@
|
|
1
|
+
require "active_record/connection_adapters/abstract_adapter"
|
2
|
+
require "aws-sdk-athena"
|
3
|
+
require "aws-sdk-s3"
|
4
|
+
require "active_record/connection_adapters/athena/schema_statements"
|
5
|
+
require "active_record/connection_adapters/athena/database_tasks"
|
6
|
+
|
7
|
+
module ActiveRecord
|
8
|
+
module ConnectionAdapters
|
9
|
+
class AthenaAdapter < AbstractAdapter
|
10
|
+
include Athena::SchemaStatements
|
11
|
+
ADAPTER_NAME = "Athena"
|
12
|
+
|
13
|
+
def initialize(config)
|
14
|
+
super(config)
|
15
|
+
@connection_options = config[:connection_options] || {}
|
16
|
+
@database = config[:database]
|
17
|
+
@s3_output_location = config[:s3_output_location]
|
18
|
+
@work_group = config[:work_group] || "primary"
|
19
|
+
end
|
20
|
+
|
21
|
+
def adapter_name
|
22
|
+
ADAPTER_NAME
|
23
|
+
end
|
24
|
+
|
25
|
+
def active?
|
26
|
+
true
|
27
|
+
end
|
28
|
+
|
29
|
+
def reconnect!
|
30
|
+
@athena_client = nil
|
31
|
+
@s3_client = nil
|
32
|
+
end
|
33
|
+
|
34
|
+
def disconnect!
|
35
|
+
# Athena doesn't maintain persistent connections
|
36
|
+
end
|
37
|
+
|
38
|
+
def supports_migrations?
|
39
|
+
false
|
40
|
+
end
|
41
|
+
|
42
|
+
def supports_primary_key?
|
43
|
+
false
|
44
|
+
end
|
45
|
+
|
46
|
+
def supports_bulk_alter?
|
47
|
+
false
|
48
|
+
end
|
49
|
+
|
50
|
+
def supports_foreign_keys?
|
51
|
+
false
|
52
|
+
end
|
53
|
+
|
54
|
+
def supports_views?
|
55
|
+
true
|
56
|
+
end
|
57
|
+
|
58
|
+
def supports_datetime_with_precision?
|
59
|
+
true
|
60
|
+
end
|
61
|
+
|
62
|
+
def supports_json?
|
63
|
+
true
|
64
|
+
end
|
65
|
+
|
66
|
+
def supports_statement_cache?
|
67
|
+
true
|
68
|
+
end
|
69
|
+
|
70
|
+
def supports_lazy_transactions?
|
71
|
+
false
|
72
|
+
end
|
73
|
+
|
74
|
+
def supports_transactions?
|
75
|
+
false
|
76
|
+
end
|
77
|
+
|
78
|
+
def supports_savepoints?
|
79
|
+
false
|
80
|
+
end
|
81
|
+
|
82
|
+
def native_database_types
|
83
|
+
{
|
84
|
+
primary_key: "string",
|
85
|
+
string: { name: "string" },
|
86
|
+
text: { name: "string" },
|
87
|
+
integer: { name: "bigint" },
|
88
|
+
bigint: { name: "bigint" },
|
89
|
+
float: { name: "double" },
|
90
|
+
decimal: { name: "decimal" },
|
91
|
+
datetime: { name: "timestamp" },
|
92
|
+
time: { name: "time" },
|
93
|
+
date: { name: "date" },
|
94
|
+
binary: { name: "binary" },
|
95
|
+
boolean: { name: "boolean" },
|
96
|
+
json: { name: "string" }
|
97
|
+
}
|
98
|
+
end
|
99
|
+
|
100
|
+
def quote_column_name(name)
|
101
|
+
"\"#{name}\""
|
102
|
+
end
|
103
|
+
|
104
|
+
def quote_table_name(name)
|
105
|
+
"\"#{name}\""
|
106
|
+
end
|
107
|
+
|
108
|
+
def quoted_true
|
109
|
+
"true"
|
110
|
+
end
|
111
|
+
|
112
|
+
def quoted_false
|
113
|
+
"false"
|
114
|
+
end
|
115
|
+
|
116
|
+
def execute(sql, name = nil)
|
117
|
+
log(sql, name) do
|
118
|
+
execute_query(sql)
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
def exec_query(sql, name = "SQL", binds = [], **kwargs)
|
123
|
+
log(sql, name) do
|
124
|
+
# Replace parameter placeholders with actual values
|
125
|
+
prepared_sql = substitute_binds(sql, binds)
|
126
|
+
query_result = execute_query(prepared_sql)
|
127
|
+
|
128
|
+
if query_result[:rows].any?
|
129
|
+
columns = query_result[:column_info].map { |col| col[:name] }
|
130
|
+
raw_rows = query_result[:rows].map { |row| row[:data].map { |cell| cell[:var_char_value] } }
|
131
|
+
|
132
|
+
# Filter out header row if it matches column names
|
133
|
+
# The first row is often the header row in Athena results
|
134
|
+
data_rows = raw_rows
|
135
|
+
if raw_rows.first && raw_rows.first == columns
|
136
|
+
data_rows = raw_rows.drop(1)
|
137
|
+
end
|
138
|
+
|
139
|
+
ActiveRecord::Result.new(columns, data_rows)
|
140
|
+
else
|
141
|
+
ActiveRecord::Result.new([], [])
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
def select_all(arel, name = nil, binds = [], **kwargs)
|
147
|
+
sql = to_sql(arel, binds)
|
148
|
+
exec_query(sql, name, binds)
|
149
|
+
end
|
150
|
+
|
151
|
+
def exec_update(sql, name = nil, binds = [])
|
152
|
+
# Athena doesn't support traditional UPDATE statements
|
153
|
+
# This is a limited implementation that will work for some use cases
|
154
|
+
log(sql, name) do
|
155
|
+
if sql.match?(/^UPDATE/i)
|
156
|
+
# Log a warning about UPDATE limitations
|
157
|
+
ActiveRecord::Base.logger&.warn("UPDATE operations in Athena are limited. Consider using INSERT OVERWRITE or MERGE operations instead.")
|
158
|
+
|
159
|
+
# For now, we'll attempt to execute the UPDATE as-is
|
160
|
+
# This will likely fail unless using Iceberg/Delta Lake tables
|
161
|
+
prepared_sql = substitute_binds(sql, binds)
|
162
|
+
result = execute_query(prepared_sql)
|
163
|
+
|
164
|
+
# Return number of affected rows (Athena doesn't provide this, so we return 0)
|
165
|
+
0
|
166
|
+
else
|
167
|
+
# Handle other modification queries
|
168
|
+
prepared_sql = substitute_binds(sql, binds)
|
169
|
+
execute_query(prepared_sql)
|
170
|
+
0
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
def exec_delete(sql, name = nil, binds = [])
|
176
|
+
# Athena doesn't support traditional DELETE statements
|
177
|
+
log(sql, name) do
|
178
|
+
if sql.match?(/^DELETE/i)
|
179
|
+
# Log a warning about DELETE limitations
|
180
|
+
ActiveRecord::Base.logger&.warn("DELETE operations in Athena are limited. Consider using INSERT OVERWRITE with filtered data instead.")
|
181
|
+
|
182
|
+
# For now, we'll attempt to execute the DELETE as-is
|
183
|
+
# This will likely fail unless using Iceberg/Delta Lake tables
|
184
|
+
prepared_sql = substitute_binds(sql, binds)
|
185
|
+
result = execute_query(prepared_sql)
|
186
|
+
|
187
|
+
# Return number of affected rows (Athena doesn't provide this, so we return 0)
|
188
|
+
0
|
189
|
+
else
|
190
|
+
# Handle other modification queries
|
191
|
+
prepared_sql = substitute_binds(sql, binds)
|
192
|
+
execute_query(prepared_sql)
|
193
|
+
0
|
194
|
+
end
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
def exec_insert(sql, name = nil, binds = [], pk = nil, sequence_name = nil, returning: nil)
|
199
|
+
# Athena supports INSERT statements
|
200
|
+
log(sql, name) do
|
201
|
+
prepared_sql = substitute_binds(sql, binds)
|
202
|
+
execute_query(prepared_sql)
|
203
|
+
|
204
|
+
# Athena doesn't support returning generated IDs
|
205
|
+
# Return nil for the primary key value
|
206
|
+
nil
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
210
|
+
# Transaction methods (no-op for Athena)
|
211
|
+
def begin_db_transaction
|
212
|
+
# Athena doesn't support transactions
|
213
|
+
# This is a no-op to satisfy the interface
|
214
|
+
end
|
215
|
+
|
216
|
+
def commit_db_transaction
|
217
|
+
# Athena doesn't support transactions
|
218
|
+
# This is a no-op to satisfy the interface
|
219
|
+
end
|
220
|
+
|
221
|
+
def rollback_db_transaction
|
222
|
+
# Athena doesn't support transactions
|
223
|
+
# This is a no-op to satisfy the interface
|
224
|
+
end
|
225
|
+
|
226
|
+
private
|
227
|
+
|
228
|
+
def athena_client
|
229
|
+
@athena_client ||= Aws::Athena::Client.new(aws_config)
|
230
|
+
end
|
231
|
+
|
232
|
+
def s3_client
|
233
|
+
@s3_client ||= Aws::S3::Client.new(aws_config)
|
234
|
+
end
|
235
|
+
|
236
|
+
def aws_config
|
237
|
+
@connection_options[:aws_config] || {}
|
238
|
+
end
|
239
|
+
|
240
|
+
def substitute_binds(sql, binds)
|
241
|
+
# Handle special case where we have ? placeholders but no binds
|
242
|
+
# This often happens with LIMIT clauses in newer ActiveRecord versions
|
243
|
+
if binds.empty? && sql.include?('?')
|
244
|
+
# For LIMIT clauses, we'll use a reasonable default
|
245
|
+
# In production, this might need more sophisticated handling
|
246
|
+
sql = sql.gsub(/LIMIT \?/, 'LIMIT 1000')
|
247
|
+
return sql
|
248
|
+
end
|
249
|
+
|
250
|
+
return sql if binds.empty?
|
251
|
+
|
252
|
+
# Replace ? placeholders with actual values
|
253
|
+
bind_index = 0
|
254
|
+
sql.gsub('?') do
|
255
|
+
if bind_index < binds.length
|
256
|
+
bind = binds[bind_index]
|
257
|
+
bind_index += 1
|
258
|
+
|
259
|
+
# Handle different types of bind values
|
260
|
+
value = bind.respond_to?(:value) ? bind.value : bind
|
261
|
+
quote(value)
|
262
|
+
else
|
263
|
+
'?'
|
264
|
+
end
|
265
|
+
end
|
266
|
+
end
|
267
|
+
|
268
|
+
def quote(value)
|
269
|
+
case value
|
270
|
+
when String
|
271
|
+
"'#{value.gsub("'", "''")}'"
|
272
|
+
when Integer, Float
|
273
|
+
value.to_s
|
274
|
+
when true
|
275
|
+
'true'
|
276
|
+
when false
|
277
|
+
'false'
|
278
|
+
when nil
|
279
|
+
'NULL'
|
280
|
+
when Date
|
281
|
+
"'#{value.strftime('%Y-%m-%d')}'"
|
282
|
+
when Time, DateTime
|
283
|
+
"'#{value.strftime('%Y-%m-%d %H:%M:%S')}'"
|
284
|
+
else
|
285
|
+
"'#{value.to_s.gsub("'", "''")}'"
|
286
|
+
end
|
287
|
+
end
|
288
|
+
|
289
|
+
def execute_query(sql)
|
290
|
+
query_execution_id = start_query_execution(sql)
|
291
|
+
wait_for_query_completion(query_execution_id)
|
292
|
+
get_query_results(query_execution_id)
|
293
|
+
end
|
294
|
+
|
295
|
+
def start_query_execution(sql)
|
296
|
+
response = athena_client.start_query_execution({
|
297
|
+
query_string: sql,
|
298
|
+
query_execution_context: {
|
299
|
+
database: @database
|
300
|
+
},
|
301
|
+
result_configuration: {
|
302
|
+
output_location: @s3_output_location
|
303
|
+
},
|
304
|
+
work_group: @work_group
|
305
|
+
})
|
306
|
+
|
307
|
+
response.query_execution_id
|
308
|
+
end
|
309
|
+
|
310
|
+
def wait_for_query_completion(query_execution_id)
|
311
|
+
loop do
|
312
|
+
response = athena_client.get_query_execution({
|
313
|
+
query_execution_id: query_execution_id
|
314
|
+
})
|
315
|
+
|
316
|
+
status = response.query_execution.status.state
|
317
|
+
|
318
|
+
case status
|
319
|
+
when "SUCCEEDED"
|
320
|
+
break
|
321
|
+
when "FAILED", "CANCELLED"
|
322
|
+
raise ActiveRecord::StatementInvalid, "Query failed: #{response.query_execution.status.state_change_reason}"
|
323
|
+
else
|
324
|
+
sleep(0.5)
|
325
|
+
end
|
326
|
+
end
|
327
|
+
end
|
328
|
+
|
329
|
+
def get_query_results(query_execution_id)
|
330
|
+
response = athena_client.get_query_results({
|
331
|
+
query_execution_id: query_execution_id
|
332
|
+
})
|
333
|
+
|
334
|
+
{
|
335
|
+
column_info: response.result_set.result_set_metadata.column_info,
|
336
|
+
rows: response.result_set.rows
|
337
|
+
}
|
338
|
+
end
|
339
|
+
|
340
|
+
def to_sql(arel, binds)
|
341
|
+
if arel.respond_to?(:to_sql)
|
342
|
+
arel.to_sql
|
343
|
+
else
|
344
|
+
arel
|
345
|
+
end
|
346
|
+
end
|
347
|
+
end
|
348
|
+
end
|
349
|
+
end
|
350
|
+
|
351
|
+
# Register the adapter
|
352
|
+
ActiveRecord::ConnectionAdapters.register("athena", "ActiveRecord::ConnectionAdapters::AthenaAdapter", "active_record/connection_adapters/athena_adapter")
|
metadata
ADDED
@@ -0,0 +1,150 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: activerecord_athena
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Jeremy Hinkle
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2025-07-09 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: activerecord
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '6.0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '6.0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: aws-sdk-athena
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: aws-sdk-s3
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '1.0'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '1.0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rspec
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '3.0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '3.0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rake
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '13.0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '13.0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: nokogiri
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '1.0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '1.0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: simplecov
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - "~>"
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0.22'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - "~>"
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0.22'
|
111
|
+
description: An ActiveRecord adapter that enables Rails applications to connect to
|
112
|
+
and query AWS Athena
|
113
|
+
email:
|
114
|
+
- jchinkle@gmail.com
|
115
|
+
executables: []
|
116
|
+
extensions: []
|
117
|
+
extra_rdoc_files: []
|
118
|
+
files:
|
119
|
+
- LICENSE
|
120
|
+
- README.md
|
121
|
+
- lib/active_record/connection_adapters/athena/database_tasks.rb
|
122
|
+
- lib/active_record/connection_adapters/athena/schema_dumper.rb
|
123
|
+
- lib/active_record/connection_adapters/athena/schema_statements.rb
|
124
|
+
- lib/active_record/connection_adapters/athena_adapter.rb
|
125
|
+
- lib/activerecord_athena.rb
|
126
|
+
- lib/activerecord_athena/version.rb
|
127
|
+
homepage: https://github.com/jchinkle/activerecord_athena
|
128
|
+
licenses:
|
129
|
+
- MIT
|
130
|
+
metadata: {}
|
131
|
+
post_install_message:
|
132
|
+
rdoc_options: []
|
133
|
+
require_paths:
|
134
|
+
- lib
|
135
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
136
|
+
requirements:
|
137
|
+
- - ">="
|
138
|
+
- !ruby/object:Gem::Version
|
139
|
+
version: 2.7.0
|
140
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
141
|
+
requirements:
|
142
|
+
- - ">="
|
143
|
+
- !ruby/object:Gem::Version
|
144
|
+
version: '0'
|
145
|
+
requirements: []
|
146
|
+
rubygems_version: 3.5.11
|
147
|
+
signing_key:
|
148
|
+
specification_version: 4
|
149
|
+
summary: ActiveRecord adapter for AWS Athena
|
150
|
+
test_files: []
|