egis 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/egis.gemspec +30 -0
- data/lib/egis/aws_client_provider.rb +30 -0
- data/lib/egis/cartesian_product_generator.rb +17 -0
- data/lib/egis/client.rb +142 -0
- data/lib/egis/configuration.rb +9 -0
- data/lib/egis/database.rb +102 -0
- data/lib/egis/errors.rb +12 -0
- data/lib/egis/output_downloader.rb +21 -0
- data/lib/egis/output_parser.rb +24 -0
- data/lib/egis/partitions_generator.rb +55 -0
- data/lib/egis/query_output_location.rb +13 -0
- data/lib/egis/query_status.rb +76 -0
- data/lib/egis/s3_cleaner.rb +22 -0
- data/lib/egis/s3_location_parser.rb +14 -0
- data/lib/egis/standard_mode.rb +18 -0
- data/lib/egis/table.rb +163 -0
- data/lib/egis/table_data_wiper.rb +51 -0
- data/lib/egis/table_ddl_generator.rb +50 -0
- data/lib/egis/table_schema.rb +49 -0
- data/lib/egis/testing/testing_mode.rb +62 -0
- data/lib/egis/testing.rb +48 -0
- data/lib/egis/types/boolean_serializer.rb +53 -0
- data/lib/egis/types/default_serializer.rb +20 -0
- data/lib/egis/types/integer_serializer.rb +20 -0
- data/lib/egis/types/null_serializer.rb +36 -0
- data/lib/egis/types/string_serializer.rb +20 -0
- data/lib/egis/types/timestamp_serializer.rb +22 -0
- data/lib/egis/types.rb +30 -0
- data/lib/egis/version.rb +5 -0
- data/lib/egis.rb +62 -0
- metadata +106 -0
    
        data/lib/egis/table.rb
    ADDED
    
    | @@ -0,0 +1,163 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            module Egis
         | 
| 4 | 
            +
              ##
         | 
| 5 | 
            +
              # Interface for Athena table manipulation.
         | 
| 6 | 
            +
              #
         | 
| 7 | 
            +
              # It is recommended to create table objects using {Egis::Database#table} method.
         | 
| 8 | 
            +
              #
         | 
| 9 | 
            +
              # @!attribute [r] database
         | 
| 10 | 
            +
              #   @return [Egis::Database]
         | 
| 11 | 
            +
              # @!attribute [r] name
         | 
| 12 | 
            +
              #   @return [String] Athena database name
         | 
| 13 | 
            +
              # @!attribute [r] schema
         | 
| 14 | 
            +
              #   @return [Egis::TableSchema] table's schema object
         | 
| 15 | 
            +
              #
         | 
| 16 | 
            +
              class Table
         | 
| 17 | 
            +
                DEFAULT_OPTIONS = {format: :tsv}.freeze
         | 
| 18 | 
            +
             | 
| 19 | 
            +
                def initialize(database, name, schema, location, options: {},
         | 
| 20 | 
            +
                               partitions_generator: Egis::PartitionsGenerator.new,
         | 
| 21 | 
            +
                               table_ddl_generator: Egis::TableDDLGenerator.new,
         | 
| 22 | 
            +
                               output_downloader: Egis::OutputDownloader.new,
         | 
| 23 | 
            +
                               output_parser: Egis::OutputParser.new,
         | 
| 24 | 
            +
                               table_data_wiper: Egis::TableDataWiper.new)
         | 
| 25 | 
            +
                  @database = database
         | 
| 26 | 
            +
                  @name = name
         | 
| 27 | 
            +
                  @schema = schema
         | 
| 28 | 
            +
                  @location = location
         | 
| 29 | 
            +
                  @options = DEFAULT_OPTIONS.merge(options)
         | 
| 30 | 
            +
                  @partitions_generator = partitions_generator
         | 
| 31 | 
            +
                  @table_ddl_generator = table_ddl_generator
         | 
| 32 | 
            +
                  @output_downloader = output_downloader
         | 
| 33 | 
            +
                  @output_parser = output_parser
         | 
| 34 | 
            +
                  @table_data_wiper = table_data_wiper
         | 
| 35 | 
            +
                end
         | 
| 36 | 
            +
             | 
| 37 | 
            +
                attr_reader :database, :name, :schema
         | 
| 38 | 
            +
             | 
| 39 | 
            +
                ##
         | 
| 40 | 
            +
                # Creates table in Athena.
         | 
| 41 | 
            +
                #
         | 
| 42 | 
            +
                # @return [void]
         | 
| 43 | 
            +
             | 
| 44 | 
            +
                def create
         | 
| 45 | 
            +
                  create_table_sql = table_ddl_generator.create_table_sql(self, permissive: true)
         | 
| 46 | 
            +
                  database.execute_query(create_table_sql, async: false)
         | 
| 47 | 
            +
                end
         | 
| 48 | 
            +
             | 
| 49 | 
            +
                ##
         | 
| 50 | 
            +
                # The same as {#create} but raising error when table with a given name already exists.
         | 
| 51 | 
            +
                #
         | 
| 52 | 
            +
                # @return [void]
         | 
| 53 | 
            +
             | 
| 54 | 
            +
                def create!
         | 
| 55 | 
            +
                  create_table_sql = table_ddl_generator.create_table_sql(self, permissive: false)
         | 
| 56 | 
            +
                  database.execute_query(create_table_sql, async: false)
         | 
| 57 | 
            +
                end
         | 
| 58 | 
            +
             | 
| 59 | 
            +
                ##
         | 
| 60 | 
            +
                # Creates partitions with all possible combinations of given partition values.
         | 
| 61 | 
            +
                #
         | 
| 62 | 
            +
                # @example
         | 
| 63 | 
            +
                #   table.add_partitions(year: [2000, 2001], type: ['user'])
         | 
| 64 | 
            +
                #
         | 
| 65 | 
            +
                # @param [Hash] partitions
         | 
| 66 | 
            +
                # @return [void]
         | 
| 67 | 
            +
             | 
| 68 | 
            +
                def add_partitions(partitions)
         | 
| 69 | 
            +
                  load_partitions_query = partitions_generator.to_sql(name, partitions, permissive: true)
         | 
| 70 | 
            +
                  database.execute_query(load_partitions_query, async: false)
         | 
| 71 | 
            +
                end
         | 
| 72 | 
            +
             | 
| 73 | 
            +
                ##
         | 
| 74 | 
            +
                # (see add_partitions)
         | 
| 75 | 
            +
                # It raises error when a partition already exists.
         | 
| 76 | 
            +
             | 
| 77 | 
            +
                def add_partitions!(partitions)
         | 
| 78 | 
            +
                  load_partitions_query = partitions_generator.to_sql(name, partitions, permissive: false)
         | 
| 79 | 
            +
                  database.execute_query(load_partitions_query, async: false)
         | 
| 80 | 
            +
                end
         | 
| 81 | 
            +
             | 
| 82 | 
            +
                ##
         | 
| 83 | 
            +
                # Tells Athena to automatically discover table's partitions by scanning table's S3 location.
         | 
| 84 | 
            +
                # This operation might take long time with big number of partitions. If that's the case, instead of this method use
         | 
| 85 | 
            +
                # {#add_partitions} to define partitions manually.
         | 
| 86 | 
            +
                #
         | 
| 87 | 
            +
                # @return [void]
         | 
| 88 | 
            +
             | 
| 89 | 
            +
                def discover_partitions
         | 
| 90 | 
            +
                  database.execute_query("MSCK REPAIR TABLE #{name};", async: false)
         | 
| 91 | 
            +
                end
         | 
| 92 | 
            +
             | 
| 93 | 
            +
                ##
         | 
| 94 | 
            +
                # Insert data into the table. Mostly useful for testing purposes.
         | 
| 95 | 
            +
                #
         | 
| 96 | 
            +
                # @param [Array] rows Array of arrays with row values
         | 
| 97 | 
            +
                # @return [void]
         | 
| 98 | 
            +
             | 
| 99 | 
            +
                def upload_data(rows)
         | 
| 100 | 
            +
                  query = data_insert_query(rows)
         | 
| 101 | 
            +
                  database.execute_query(query, async: false)
         | 
| 102 | 
            +
                end
         | 
| 103 | 
            +
             | 
| 104 | 
            +
                ##
         | 
| 105 | 
            +
                # Downloads table contents into memory. Mostly useful for testing purposes.
         | 
| 106 | 
            +
                #
         | 
| 107 | 
            +
                # @return [Array] Array of arrays with row values.
         | 
| 108 | 
            +
             | 
| 109 | 
            +
                def download_data
         | 
| 110 | 
            +
                  result = database.execute_query("SELECT * FROM #{name};", async: false)
         | 
| 111 | 
            +
                  content = output_downloader.download(result.output_location)
         | 
| 112 | 
            +
                  output_parser.parse(content, column_types)
         | 
| 113 | 
            +
                end
         | 
| 114 | 
            +
             | 
| 115 | 
            +
                ##
         | 
| 116 | 
            +
                # Removes table's content on S3. Optionally, you can limit files removed to specific partitions.
         | 
| 117 | 
            +
                #
         | 
| 118 | 
            +
                # @param [Hash] partitions Partitions values to remove. Follows the same argument format as {#add_partitions}.
         | 
| 119 | 
            +
                # @return [void]
         | 
| 120 | 
            +
             | 
| 121 | 
            +
                def wipe_data(partitions: nil)
         | 
| 122 | 
            +
                  table_data_wiper.wipe_table_data(self, partitions)
         | 
| 123 | 
            +
                end
         | 
| 124 | 
            +
             | 
| 125 | 
            +
                ##
         | 
| 126 | 
            +
                # @return Table data format
         | 
| 127 | 
            +
             | 
| 128 | 
            +
                def format
         | 
| 129 | 
            +
                  options.fetch(:format)
         | 
| 130 | 
            +
                end
         | 
| 131 | 
            +
             | 
| 132 | 
            +
                ##
         | 
| 133 | 
            +
                # @return [String] table location URL
         | 
| 134 | 
            +
             | 
| 135 | 
            +
                def location
         | 
| 136 | 
            +
                  Egis.mode.s3_path(@location)
         | 
| 137 | 
            +
                end
         | 
| 138 | 
            +
             | 
| 139 | 
            +
                private
         | 
| 140 | 
            +
             | 
| 141 | 
            +
                attr_reader :options, :partitions_generator, :table_ddl_generator, :output_downloader, :output_parser,
         | 
| 142 | 
            +
                            :table_data_wiper
         | 
| 143 | 
            +
             | 
| 144 | 
            +
                def column_serializers
         | 
| 145 | 
            +
                  @column_serializers ||= column_types.map { |type| Egis::Types.serializer(type) }
         | 
| 146 | 
            +
                end
         | 
| 147 | 
            +
             | 
| 148 | 
            +
                def column_types
         | 
| 149 | 
            +
                  (schema.columns + schema.partitions).map(&:type)
         | 
| 150 | 
            +
                end
         | 
| 151 | 
            +
             | 
| 152 | 
            +
                def data_insert_query(rows)
         | 
| 153 | 
            +
                  <<~SQL
         | 
| 154 | 
            +
                    INSERT INTO #{name} VALUES
         | 
| 155 | 
            +
                    #{rows.map { |row| row_values_statement(row) }.join(",\n")};
         | 
| 156 | 
            +
                  SQL
         | 
| 157 | 
            +
                end
         | 
| 158 | 
            +
             | 
| 159 | 
            +
                def row_values_statement(row)
         | 
| 160 | 
            +
                  "(#{row.zip(column_serializers).map { |value, serializer| serializer.literal(value) }.join(', ')})"
         | 
| 161 | 
            +
                end
         | 
| 162 | 
            +
              end
         | 
| 163 | 
            +
            end
         | 
| @@ -0,0 +1,51 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            module Egis
         | 
| 4 | 
            +
              # @!visibility private
         | 
| 5 | 
            +
              class TableDataWiper
         | 
| 6 | 
            +
                def initialize(s3_location_parser: Egis::S3LocationParser.new,
         | 
| 7 | 
            +
                               s3_cleaner: Egis::S3Cleaner.new,
         | 
| 8 | 
            +
                               cartesian_product_generator: Egis::CartesianProductGenerator.new)
         | 
| 9 | 
            +
                  @s3_location_parser = s3_location_parser
         | 
| 10 | 
            +
                  @s3_cleaner = s3_cleaner
         | 
| 11 | 
            +
                  @cartesian_product_generator = cartesian_product_generator
         | 
| 12 | 
            +
                end
         | 
| 13 | 
            +
             | 
| 14 | 
            +
                def wipe_table_data(table, partitions)
         | 
| 15 | 
            +
                  bucket, location = s3_location_parser.parse_url(table.location)
         | 
| 16 | 
            +
             | 
| 17 | 
            +
                  return s3_cleaner.delete(bucket, location) unless partitions
         | 
| 18 | 
            +
             | 
| 19 | 
            +
                  partition_values_to_remove = partition_values_to_remove(table, partitions)
         | 
| 20 | 
            +
             | 
| 21 | 
            +
                  validate_partition_values(partition_values_to_remove, partitions)
         | 
| 22 | 
            +
             | 
| 23 | 
            +
                  remove_partition_files(bucket, location, partition_values_to_remove)
         | 
| 24 | 
            +
                end
         | 
| 25 | 
            +
             | 
| 26 | 
            +
                private
         | 
| 27 | 
            +
             | 
| 28 | 
            +
                attr_reader :s3_location_parser, :s3_cleaner, :cartesian_product_generator
         | 
| 29 | 
            +
             | 
| 30 | 
            +
                def partition_values_to_remove(table, partitions)
         | 
| 31 | 
            +
                  table_partitions = table.schema.partitions.map(&:name)
         | 
| 32 | 
            +
                  given_partitions = partitions.keys
         | 
| 33 | 
            +
             | 
| 34 | 
            +
                  partitions_to_delete = table_partitions.take_while { |partition| given_partitions.include?(partition) }
         | 
| 35 | 
            +
                  partitions_to_delete.map { |partition_name| [partition_name, partitions.fetch(partition_name)] }.to_h
         | 
| 36 | 
            +
                end
         | 
| 37 | 
            +
             | 
| 38 | 
            +
                def validate_partition_values(removed_partition_values, partitions)
         | 
| 39 | 
            +
                  return unless removed_partition_values.empty? || removed_partition_values.values.any?(&:empty?)
         | 
| 40 | 
            +
             | 
| 41 | 
            +
                  raise Egis::Errors::PartitionError, "Incorrect partitions given: #{partitions}"
         | 
| 42 | 
            +
                end
         | 
| 43 | 
            +
             | 
| 44 | 
            +
                def remove_partition_files(bucket, location, partitions_with_values)
         | 
| 45 | 
            +
                  cartesian_product_generator.cartesian_product(partitions_with_values).each do |partition_value_set|
         | 
| 46 | 
            +
                    partition_prefix = partition_value_set.map { |name_value| name_value.join('=') }.join('/')
         | 
| 47 | 
            +
                    s3_cleaner.delete(bucket, "#{location}/#{partition_prefix}")
         | 
| 48 | 
            +
                  end
         | 
| 49 | 
            +
                end
         | 
| 50 | 
            +
              end
         | 
| 51 | 
            +
            end
         | 
| @@ -0,0 +1,50 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            module Egis
         | 
| 4 | 
            +
              # @!visibility private
         | 
| 5 | 
            +
              class TableDDLGenerator
         | 
| 6 | 
            +
                def create_table_sql(table, permissive: false)
         | 
| 7 | 
            +
                  <<~SQL
         | 
| 8 | 
            +
                    CREATE EXTERNAL TABLE #{permissive_statement(permissive)}#{table.name} (
         | 
| 9 | 
            +
                      #{column_definition_sql(table.schema.columns)}
         | 
| 10 | 
            +
                    )
         | 
| 11 | 
            +
                    #{partition_statement(table.schema)}
         | 
| 12 | 
            +
                    #{format_statement(table.format)}
         | 
| 13 | 
            +
                    LOCATION '#{table.location}';
         | 
| 14 | 
            +
                  SQL
         | 
| 15 | 
            +
                end
         | 
| 16 | 
            +
             | 
| 17 | 
            +
                private
         | 
| 18 | 
            +
             | 
| 19 | 
            +
                def permissive_statement(permissive_flag)
         | 
| 20 | 
            +
                  'IF NOT EXISTS ' if permissive_flag
         | 
| 21 | 
            +
                end
         | 
| 22 | 
            +
             | 
| 23 | 
            +
                def partition_statement(table_schema)
         | 
| 24 | 
            +
                  return if table_schema.partitions.empty?
         | 
| 25 | 
            +
             | 
| 26 | 
            +
                  <<~SQL
         | 
| 27 | 
            +
                    PARTITIONED BY (
         | 
| 28 | 
            +
                      #{column_definition_sql(table_schema.partitions)}
         | 
| 29 | 
            +
                    )
         | 
| 30 | 
            +
                  SQL
         | 
| 31 | 
            +
                end
         | 
| 32 | 
            +
             | 
| 33 | 
            +
                def column_definition_sql(columns)
         | 
| 34 | 
            +
                  columns.map { |column| "`#{column.name}` #{column.type}" }.join(",\n")
         | 
| 35 | 
            +
                end
         | 
| 36 | 
            +
             | 
| 37 | 
            +
                def format_statement(format)
         | 
| 38 | 
            +
                  case format
         | 
| 39 | 
            +
                  when :csv
         | 
| 40 | 
            +
                    "ROW FORMAT DELIMITED FIELDS TERMINATED BY ','"
         | 
| 41 | 
            +
                  when :tsv
         | 
| 42 | 
            +
                    "ROW FORMAT DELIMITED FIELDS TERMINATED BY '\\t'"
         | 
| 43 | 
            +
                  when :orc
         | 
| 44 | 
            +
                    'STORED AS ORC'
         | 
| 45 | 
            +
                  else
         | 
| 46 | 
            +
                    raise Errors::UnsupportedTableFormat, format.to_s
         | 
| 47 | 
            +
                  end
         | 
| 48 | 
            +
                end
         | 
| 49 | 
            +
              end
         | 
| 50 | 
            +
            end
         | 
| @@ -0,0 +1,49 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            module Egis
         | 
| 4 | 
            +
              ##
         | 
| 5 | 
            +
              # Provides DSL for defining table schemas.
         | 
| 6 | 
            +
              #
         | 
| 7 | 
            +
              # @example Table schema definition
         | 
| 8 | 
            +
              #   schema = Egis::TableSchema.define do
         | 
| 9 | 
            +
              #     column :id, :int
         | 
| 10 | 
            +
              #     column :message, :string
         | 
| 11 | 
            +
              #
         | 
| 12 | 
            +
              #     partition :country, :string
         | 
| 13 | 
            +
              #     partition :type, :int
         | 
| 14 | 
            +
              #   end
         | 
| 15 | 
            +
              #
         | 
| 16 | 
            +
              # @!attribute [r] columns
         | 
| 17 | 
            +
              #   @return [Egis::TableSchema::Column]
         | 
| 18 | 
            +
              # @!attribute [r] partitions
         | 
| 19 | 
            +
              #   @return [Egis::TableSchema::Column]
         | 
| 20 | 
            +
              #
         | 
| 21 | 
            +
              class TableSchema
         | 
| 22 | 
            +
                ##
         | 
| 23 | 
            +
                # @return [Egis::TableSchema]
         | 
| 24 | 
            +
             | 
| 25 | 
            +
                def self.define(&block)
         | 
| 26 | 
            +
                  new(&block)
         | 
| 27 | 
            +
                end
         | 
| 28 | 
            +
             | 
| 29 | 
            +
                def initialize(&block)
         | 
| 30 | 
            +
                  @columns = []
         | 
| 31 | 
            +
                  @partitions = []
         | 
| 32 | 
            +
                  instance_eval(&block)
         | 
| 33 | 
            +
                end
         | 
| 34 | 
            +
             | 
| 35 | 
            +
                attr_reader :columns, :partitions
         | 
| 36 | 
            +
             | 
| 37 | 
            +
                private
         | 
| 38 | 
            +
             | 
| 39 | 
            +
                def column(name, type)
         | 
| 40 | 
            +
                  @columns << Column.new(name, type)
         | 
| 41 | 
            +
                end
         | 
| 42 | 
            +
             | 
| 43 | 
            +
                def partition(name, type)
         | 
| 44 | 
            +
                  @partitions << Column.new(name, type)
         | 
| 45 | 
            +
                end
         | 
| 46 | 
            +
             | 
| 47 | 
            +
                Column = Struct.new(:name, :type)
         | 
| 48 | 
            +
              end
         | 
| 49 | 
            +
            end
         | 
| @@ -0,0 +1,62 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            module Egis
         | 
| 4 | 
            +
              module Testing
         | 
| 5 | 
            +
                # @!visibility private
         | 
| 6 | 
            +
                class TestingMode
         | 
| 7 | 
            +
                  def initialize(test_id, s3_bucket,
         | 
| 8 | 
            +
                                 client: Egis::Client.new,
         | 
| 9 | 
            +
                                 output_downloader: Egis::OutputDownloader.new,
         | 
| 10 | 
            +
                                 s3_location_parser: Egis::S3LocationParser.new)
         | 
| 11 | 
            +
                    @test_id = test_id
         | 
| 12 | 
            +
                    @s3_bucket = s3_bucket
         | 
| 13 | 
            +
                    @dirty = false
         | 
| 14 | 
            +
                    @client = client
         | 
| 15 | 
            +
                    @output_downloader = output_downloader
         | 
| 16 | 
            +
                    @s3_location_parser = s3_location_parser
         | 
| 17 | 
            +
                  end
         | 
| 18 | 
            +
             | 
| 19 | 
            +
                  def s3_path(s3_url)
         | 
| 20 | 
            +
                    dirty!
         | 
| 21 | 
            +
             | 
| 22 | 
            +
                    bucket, key = s3_location_parser.parse_url(s3_url)
         | 
| 23 | 
            +
             | 
| 24 | 
            +
                    "s3://#{s3_bucket}/#{test_id}/#{bucket}/#{key}"
         | 
| 25 | 
            +
                  end
         | 
| 26 | 
            +
             | 
| 27 | 
            +
                  def database_name(name)
         | 
| 28 | 
            +
                    dirty!
         | 
| 29 | 
            +
             | 
| 30 | 
            +
                    "#{test_id}_#{name}"
         | 
| 31 | 
            +
                  end
         | 
| 32 | 
            +
             | 
| 33 | 
            +
                  def async(_async_flag)
         | 
| 34 | 
            +
                    dirty!
         | 
| 35 | 
            +
             | 
| 36 | 
            +
                    false
         | 
| 37 | 
            +
                  end
         | 
| 38 | 
            +
             | 
| 39 | 
            +
                  def cleanup
         | 
| 40 | 
            +
                    remove_test_databases if dirty?
         | 
| 41 | 
            +
                  end
         | 
| 42 | 
            +
             | 
| 43 | 
            +
                  private
         | 
| 44 | 
            +
             | 
| 45 | 
            +
                  attr_reader :test_id, :s3_bucket, :client, :output_downloader, :s3_location_parser
         | 
| 46 | 
            +
             | 
| 47 | 
            +
                  def remove_test_databases
         | 
| 48 | 
            +
                    result = client.execute_query("SHOW DATABASES LIKE '#{test_id}.*';", async: false)
         | 
| 49 | 
            +
                    query_result = output_downloader.download(result.output_location)
         | 
| 50 | 
            +
                    query_result.flatten.each { |database| client.database(database).drop }
         | 
| 51 | 
            +
                  end
         | 
| 52 | 
            +
             | 
| 53 | 
            +
                  def dirty!
         | 
| 54 | 
            +
                    @dirty = true
         | 
| 55 | 
            +
                  end
         | 
| 56 | 
            +
             | 
| 57 | 
            +
                  def dirty?
         | 
| 58 | 
            +
                    @dirty
         | 
| 59 | 
            +
                  end
         | 
| 60 | 
            +
                end
         | 
| 61 | 
            +
              end
         | 
| 62 | 
            +
            end
         | 
    
        data/lib/egis/testing.rb
    ADDED
    
    | @@ -0,0 +1,48 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            require 'securerandom'
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            require 'egis/testing/testing_mode'
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            module Egis # rubocop:disable Style/Documentation
         | 
| 8 | 
            +
              # @!visibility private
         | 
| 9 | 
            +
              module Testing
         | 
| 10 | 
            +
              end
         | 
| 11 | 
            +
             | 
| 12 | 
            +
              ##
         | 
| 13 | 
            +
              # Egis testing mode.
         | 
| 14 | 
            +
              # Every table and created within method's block is mapped to a "virtual" table space in your testing S3 bucket.
         | 
| 15 | 
            +
              # Using it, you can insert test data to your production tables and they will be simulated within the testing bucket,
         | 
| 16 | 
            +
              # not touching actual locations.
         | 
| 17 | 
            +
              #
         | 
| 18 | 
            +
              # @example RSpec configuration
         | 
| 19 | 
            +
              #   # spec_helper.rb
         | 
| 20 | 
            +
              #
         | 
| 21 | 
            +
              #   require 'egis/testing'
         | 
| 22 | 
            +
              #
         | 
| 23 | 
            +
              #   Egis.configure do |config|
         | 
| 24 | 
            +
              #     config.testing_s3_bucket = 'testing-bucket'
         | 
| 25 | 
            +
              #   end
         | 
| 26 | 
            +
              #
         | 
| 27 | 
            +
              #   RSpec.configure do |config|
         | 
| 28 | 
            +
              #     config.around(:each) do |example|
         | 
| 29 | 
            +
              #       Egis.testing do
         | 
| 30 | 
            +
              #         example.run
         | 
| 31 | 
            +
              #       end
         | 
| 32 | 
            +
              #     end
         | 
| 33 | 
            +
              #   end
         | 
| 34 | 
            +
              #
         | 
| 35 | 
            +
              # @return [void]
         | 
| 36 | 
            +
             | 
| 37 | 
            +
              def self.testing
         | 
| 38 | 
            +
                test_id = SecureRandom.hex
         | 
| 39 | 
            +
                test_mode = Egis::Testing::TestingMode.new(test_id, Egis.configuration.testing_s3_bucket)
         | 
| 40 | 
            +
             | 
| 41 | 
            +
                previous_mode = Egis.mode
         | 
| 42 | 
            +
                @mode = test_mode
         | 
| 43 | 
            +
                yield
         | 
| 44 | 
            +
              ensure
         | 
| 45 | 
            +
                @mode = previous_mode
         | 
| 46 | 
            +
                test_mode.cleanup
         | 
| 47 | 
            +
              end
         | 
| 48 | 
            +
            end
         | 
| @@ -0,0 +1,53 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            module Egis
         | 
| 4 | 
            +
              module Types
         | 
| 5 | 
            +
                # @!visibility private
         | 
| 6 | 
            +
                class BooleanSerializer
         | 
| 7 | 
            +
                  TRUE_LITERAL = 'TRUE'
         | 
| 8 | 
            +
                  FALSE_LITERAL = 'FALSE'
         | 
| 9 | 
            +
             | 
| 10 | 
            +
                  TRUE_VALUE = 'true'
         | 
| 11 | 
            +
                  FALSE_VALUE = 'false'
         | 
| 12 | 
            +
             | 
| 13 | 
            +
                  def literal(value)
         | 
| 14 | 
            +
                    case value
         | 
| 15 | 
            +
                    when true
         | 
| 16 | 
            +
                      TRUE_LITERAL
         | 
| 17 | 
            +
                    when false
         | 
| 18 | 
            +
                      FALSE_LITERAL
         | 
| 19 | 
            +
                    else
         | 
| 20 | 
            +
                      illegal_value_error(value)
         | 
| 21 | 
            +
                    end
         | 
| 22 | 
            +
                  end
         | 
| 23 | 
            +
             | 
| 24 | 
            +
                  def dump(value)
         | 
| 25 | 
            +
                    case value
         | 
| 26 | 
            +
                    when true
         | 
| 27 | 
            +
                      TRUE_VALUE
         | 
| 28 | 
            +
                    when false
         | 
| 29 | 
            +
                      FALSE_VALUE
         | 
| 30 | 
            +
                    else
         | 
| 31 | 
            +
                      illegal_value_error(value)
         | 
| 32 | 
            +
                    end
         | 
| 33 | 
            +
                  end
         | 
| 34 | 
            +
             | 
| 35 | 
            +
                  def load(string)
         | 
| 36 | 
            +
                    case string
         | 
| 37 | 
            +
                    when TRUE_VALUE
         | 
| 38 | 
            +
                      true
         | 
| 39 | 
            +
                    when FALSE_VALUE
         | 
| 40 | 
            +
                      false
         | 
| 41 | 
            +
                    else
         | 
| 42 | 
            +
                      illegal_value_error(string)
         | 
| 43 | 
            +
                    end
         | 
| 44 | 
            +
                  end
         | 
| 45 | 
            +
             | 
| 46 | 
            +
                  private
         | 
| 47 | 
            +
             | 
| 48 | 
            +
                  def illegal_value_error(value)
         | 
| 49 | 
            +
                    raise Egis::TypeError, "Illegal value '#{value}' for type boolean"
         | 
| 50 | 
            +
                  end
         | 
| 51 | 
            +
                end
         | 
| 52 | 
            +
              end
         | 
| 53 | 
            +
            end
         | 
| @@ -0,0 +1,20 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            module Egis
         | 
| 4 | 
            +
              module Types
         | 
| 5 | 
            +
                # @!visibility private
         | 
| 6 | 
            +
                class DefaultSerializer
         | 
| 7 | 
            +
                  def literal(value)
         | 
| 8 | 
            +
                    "'#{value}'"
         | 
| 9 | 
            +
                  end
         | 
| 10 | 
            +
             | 
| 11 | 
            +
                  def dump(value)
         | 
| 12 | 
            +
                    value
         | 
| 13 | 
            +
                  end
         | 
| 14 | 
            +
             | 
| 15 | 
            +
                  def load(string)
         | 
| 16 | 
            +
                    string
         | 
| 17 | 
            +
                  end
         | 
| 18 | 
            +
                end
         | 
| 19 | 
            +
              end
         | 
| 20 | 
            +
            end
         | 
| @@ -0,0 +1,20 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            module Egis
         | 
| 4 | 
            +
              module Types
         | 
| 5 | 
            +
                # @!visibility private
         | 
| 6 | 
            +
                class IntegerSerializer
         | 
| 7 | 
            +
                  def literal(integer)
         | 
| 8 | 
            +
                    integer.to_s
         | 
| 9 | 
            +
                  end
         | 
| 10 | 
            +
             | 
| 11 | 
            +
                  def dump(integer)
         | 
| 12 | 
            +
                    integer.to_s
         | 
| 13 | 
            +
                  end
         | 
| 14 | 
            +
             | 
| 15 | 
            +
                  def load(string)
         | 
| 16 | 
            +
                    string.to_i
         | 
| 17 | 
            +
                  end
         | 
| 18 | 
            +
                end
         | 
| 19 | 
            +
              end
         | 
| 20 | 
            +
            end
         | 
| @@ -0,0 +1,36 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            module Egis
         | 
| 4 | 
            +
              module Types
         | 
| 5 | 
            +
                # @!visibility private
         | 
| 6 | 
            +
                class NullSerializer
         | 
| 7 | 
            +
                  NULL_LITERAL = 'NULL'
         | 
| 8 | 
            +
             | 
| 9 | 
            +
                  def initialize(wrapped_serializer)
         | 
| 10 | 
            +
                    @wrapped_serializer = wrapped_serializer
         | 
| 11 | 
            +
                  end
         | 
| 12 | 
            +
             | 
| 13 | 
            +
                  def literal(value)
         | 
| 14 | 
            +
                    return NULL_LITERAL if value.nil?
         | 
| 15 | 
            +
             | 
| 16 | 
            +
                    wrapped_serializer.literal(value)
         | 
| 17 | 
            +
                  end
         | 
| 18 | 
            +
             | 
| 19 | 
            +
                  def dump(value)
         | 
| 20 | 
            +
                    return nil if value.nil?
         | 
| 21 | 
            +
             | 
| 22 | 
            +
                    wrapped_serializer.dump(value)
         | 
| 23 | 
            +
                  end
         | 
| 24 | 
            +
             | 
| 25 | 
            +
                  def load(string)
         | 
| 26 | 
            +
                    return nil if string.nil?
         | 
| 27 | 
            +
             | 
| 28 | 
            +
                    wrapped_serializer.load(string)
         | 
| 29 | 
            +
                  end
         | 
| 30 | 
            +
             | 
| 31 | 
            +
                  private
         | 
| 32 | 
            +
             | 
| 33 | 
            +
                  attr_reader :wrapped_serializer
         | 
| 34 | 
            +
                end
         | 
| 35 | 
            +
              end
         | 
| 36 | 
            +
            end
         | 
| @@ -0,0 +1,20 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            module Egis
         | 
| 4 | 
            +
              module Types
         | 
| 5 | 
            +
                # @!visibility private
         | 
| 6 | 
            +
                class StringSerializer
         | 
| 7 | 
            +
                  def literal(string)
         | 
| 8 | 
            +
                    "'#{string.gsub("'", "''")}'"
         | 
| 9 | 
            +
                  end
         | 
| 10 | 
            +
             | 
| 11 | 
            +
                  def dump(string)
         | 
| 12 | 
            +
                    string
         | 
| 13 | 
            +
                  end
         | 
| 14 | 
            +
             | 
| 15 | 
            +
                  def load(string)
         | 
| 16 | 
            +
                    string
         | 
| 17 | 
            +
                  end
         | 
| 18 | 
            +
                end
         | 
| 19 | 
            +
              end
         | 
| 20 | 
            +
            end
         | 
| @@ -0,0 +1,22 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            module Egis
         | 
| 4 | 
            +
              module Types
         | 
| 5 | 
            +
                # @!visibility private
         | 
| 6 | 
            +
                class TimestampSerializer
         | 
| 7 | 
            +
                  ATHENA_TIME_FORMAT = '%Y-%m-%d %H:%M:%S'
         | 
| 8 | 
            +
             | 
| 9 | 
            +
                  def literal(time)
         | 
| 10 | 
            +
                    "timestamp '#{dump(time)}'"
         | 
| 11 | 
            +
                  end
         | 
| 12 | 
            +
             | 
| 13 | 
            +
                  def dump(time)
         | 
| 14 | 
            +
                    time.strftime(ATHENA_TIME_FORMAT)
         | 
| 15 | 
            +
                  end
         | 
| 16 | 
            +
             | 
| 17 | 
            +
                  def load(string)
         | 
| 18 | 
            +
                    Time.strptime(string, ATHENA_TIME_FORMAT)
         | 
| 19 | 
            +
                  end
         | 
| 20 | 
            +
                end
         | 
| 21 | 
            +
              end
         | 
| 22 | 
            +
            end
         | 
    
        data/lib/egis/types.rb
    ADDED
    
    | @@ -0,0 +1,30 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            require 'egis/types/boolean_serializer'
         | 
| 4 | 
            +
            require 'egis/types/default_serializer'
         | 
| 5 | 
            +
            require 'egis/types/integer_serializer'
         | 
| 6 | 
            +
            require 'egis/types/string_serializer'
         | 
| 7 | 
            +
            require 'egis/types/timestamp_serializer'
         | 
| 8 | 
            +
            require 'egis/types/null_serializer'
         | 
| 9 | 
            +
             | 
| 10 | 
            +
            module Egis
         | 
| 11 | 
            +
              # @!visibility private
         | 
| 12 | 
            +
              module Types
         | 
| 13 | 
            +
                def self.serializer(type)
         | 
| 14 | 
            +
                  type_serializer = case type
         | 
| 15 | 
            +
                                    when :timestamp
         | 
| 16 | 
            +
                                      TimestampSerializer.new
         | 
| 17 | 
            +
                                    when :string
         | 
| 18 | 
            +
                                      StringSerializer.new
         | 
| 19 | 
            +
                                    when :int, :bigint
         | 
| 20 | 
            +
                                      IntegerSerializer.new
         | 
| 21 | 
            +
                                    when :boolean
         | 
| 22 | 
            +
                                      BooleanSerializer.new
         | 
| 23 | 
            +
                                    else
         | 
| 24 | 
            +
                                      raise Errors::TypeError, "Unsupported type: #{type}"
         | 
| 25 | 
            +
                                    end
         | 
| 26 | 
            +
             | 
| 27 | 
            +
                  NullSerializer.new(type_serializer)
         | 
| 28 | 
            +
                end
         | 
| 29 | 
            +
              end
         | 
| 30 | 
            +
            end
         | 
    
        data/lib/egis/version.rb
    ADDED
    
    
    
        data/lib/egis.rb
    ADDED
    
    | @@ -0,0 +1,62 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            require 'egis/version'
         | 
| 4 | 
            +
            require 'egis/errors'
         | 
| 5 | 
            +
            require 'egis/configuration'
         | 
| 6 | 
            +
            require 'egis/types'
         | 
| 7 | 
            +
            require 'egis/query_status'
         | 
| 8 | 
            +
            require 'egis/aws_client_provider'
         | 
| 9 | 
            +
            require 'egis/s3_cleaner'
         | 
| 10 | 
            +
            require 'egis/output_downloader'
         | 
| 11 | 
            +
            require 'egis/output_parser'
         | 
| 12 | 
            +
            require 'egis/client'
         | 
| 13 | 
            +
            require 'egis/cartesian_product_generator'
         | 
| 14 | 
            +
            require 'egis/partitions_generator'
         | 
| 15 | 
            +
            require 'egis/table_data_wiper'
         | 
| 16 | 
            +
            require 'egis/table'
         | 
| 17 | 
            +
            require 'egis/database'
         | 
| 18 | 
            +
            require 'egis/query_output_location'
         | 
| 19 | 
            +
            require 'egis/table_ddl_generator'
         | 
| 20 | 
            +
            require 'egis/table_schema'
         | 
| 21 | 
            +
            require 'egis/standard_mode'
         | 
| 22 | 
            +
            require 'egis/s3_location_parser'
         | 
| 23 | 
            +
             | 
| 24 | 
            +
            ##
         | 
| 25 | 
            +
            # Egis is configured using Egis.configure block.
         | 
| 26 | 
            +
            #
         | 
| 27 | 
            +
            # @example Configuration using AWS access key ID and secret
         | 
| 28 | 
            +
            #   Egis.configure do |config|
         | 
| 29 | 
            +
            #     config.aws_region = 'AWS region'
         | 
| 30 | 
            +
            #     config.aws_access_key_id = 'AWS key ID'
         | 
| 31 | 
            +
            #     config.aws_secret_access_key = 'AWS secret key'
         | 
| 32 | 
            +
            #     config.work_group = 'egis-integration-testing'
         | 
| 33 | 
            +
            #   end
         | 
| 34 | 
            +
            #
         | 
| 35 | 
            +
            # If you don't specify credentials they will be looked up in the default locations. For more information see
         | 
| 36 | 
            +
            # {https://docs.aws.amazon.com/sdk-for-ruby/v3/developer-guide/setup-config.html}
         | 
| 37 | 
            +
            #
         | 
| 38 | 
            +
            # @example Use specific credentials profile from `~/.aws/credentials`
         | 
| 39 | 
            +
            #   Egis.configure do |config|
         | 
| 40 | 
            +
            #     config.aws_profile = 'my-profile'
         | 
| 41 | 
            +
            #   end
         | 
| 42 | 
            +
            #
         | 
| 43 | 
            +
            # @yield [Egis::Configuration]
         | 
| 44 | 
            +
            # @return [void]
         | 
| 45 | 
            +
            #
         | 
| 46 | 
            +
            module Egis
         | 
| 47 | 
            +
              class << self
         | 
| 48 | 
            +
                def configure
         | 
| 49 | 
            +
                  yield(configuration)
         | 
| 50 | 
            +
                end
         | 
| 51 | 
            +
             | 
| 52 | 
            +
                # @!visibility private
         | 
| 53 | 
            +
                def configuration
         | 
| 54 | 
            +
                  @configuration ||= Configuration.new
         | 
| 55 | 
            +
                end
         | 
| 56 | 
            +
             | 
| 57 | 
            +
                # @!visibility private
         | 
| 58 | 
            +
                def mode
         | 
| 59 | 
            +
                  @mode ||= Egis::StandardMode.new
         | 
| 60 | 
            +
                end
         | 
| 61 | 
            +
              end
         | 
| 62 | 
            +
            end
         |