egis 1.5.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 924727f52ac809df7ab4457a6a0c05469cd6c128adaff339018e964dc9ff4ea0
4
- data.tar.gz: c8ba9d090641c899e832b73b062314bd76fa4fb39041fb175d7e3a717e943856
3
+ metadata.gz: ca7464c31cc32115edb77470e2d5f2abe99ab9f4a64eef2ae0820ed8d5b982b7
4
+ data.tar.gz: 8002e611e1c59635365a3ca2ff54191e1a9671f4b5498c940438c85b7ea45d28
5
5
  SHA512:
6
- metadata.gz: fa429f550a68475a75f9dd9c0610b0d41c9a8017f0e46c0f9e467cfaeddfb7d6587d6e7df5b4440bbd75e4d4d9a12d5966f40e4ce72cb8abf70e5dc2897f91d8
7
- data.tar.gz: 3beac29ce82c267bfc2621e730a8b631f76a94f7426ac7d7901b31d204cda3320d1ab934e9c3e156ee52e5fca315728412ffd5a007bf4623dd2ac077d85399ba
6
+ metadata.gz: 5971ac98ab6bfdcbcac23a4b9e0e2b8b31b14cb06987ea6f3891fc0c1787087ebfca85eae026c8cb1c2952f11e4659cf36827492b8d9fdf7d43d3a6943d3bd7f
7
+ data.tar.gz: 98d379a3ddb1b3796b08e9c5a4cc584826324fa03e24d64ddb604405ec57a6f0d75a3d2138cf9123f154e275b5202c592814c4cea6a00049d75eda6ec5d2de8b
data/lib/egis/database.rb CHANGED
@@ -28,8 +28,7 @@ module Egis
28
28
  # @param [String] table_name
29
29
  # @param [Egis::TableSchema] table_schema
30
30
  # @param [String] table_location S3 URL with table location (e.g. `s3://s3_bucket/table/location/`)
31
- # @param [:tsv, :csv, :orc, {serde: 'SerdeClass', serde_properties: {property: value}}] format Table format
32
- # (defaults to :tsv)
31
+ # @param [:tsv, :csv, :orc, :orc_index_access, :json, String] format Table Format (defaults to :tsv)
33
32
  # @return [Egis::Table]
34
33
 
35
34
  def table(table_name, table_schema, table_location, **options)
data/lib/egis/table.rb CHANGED
@@ -142,7 +142,6 @@ module Egis
142
142
 
143
143
  ##
144
144
  # @return Table data format
145
-
146
145
  def format
147
146
  options.fetch(:format)
148
147
  end
@@ -9,7 +9,7 @@ module Egis
9
9
  #{column_definition_sql(table.schema.columns)}
10
10
  )
11
11
  #{partition_statement(table.schema)}
12
- #{row_format_statement(table.format)}
12
+ #{format_statement(table.format)}
13
13
  LOCATION '#{table.location}';
14
14
  SQL
15
15
  end
@@ -34,37 +34,31 @@ module Egis
34
34
  columns.map { |column| "`#{column.name}` #{column.type}" }.join(",\n")
35
35
  end
36
36
 
37
- def serde?(format)
38
- format.is_a?(Hash) && format.key?(:serde)
39
- end
40
-
41
- def row_format_statement(format)
42
- return serde_row_format_statement(format) if serde?(format)
43
-
44
- delimited_row_format_statement(format)
45
- end
37
+ def format_statement(format)
38
+ return format if format.is_a?(String)
46
39
 
47
- def serde_row_format_statement(format)
48
- row_format = "ROW FORMAT SERDE '#{format[:serde]}'"
49
- return row_format unless format.key?(:serde_properties)
50
-
51
- serde_properties = format[:serde_properties].map { |property, value| "'#{property}' = '#{value}'" }
52
- <<-SQL
53
- #{row_format}
54
- WITH SERDEPROPERTIES (
55
- #{serde_properties.join(",\n")}
56
- )
57
- SQL
40
+ format_preset(format)
58
41
  end
59
42
 
60
- def delimited_row_format_statement(format)
43
+ def format_preset(format) # rubocop:disable Metrics/MethodLength
61
44
  case format
62
45
  when :csv
63
46
  "ROW FORMAT DELIMITED FIELDS TERMINATED BY ','"
64
47
  when :tsv
65
48
  "ROW FORMAT DELIMITED FIELDS TERMINATED BY '\\t'"
66
49
  when :orc
50
+ <<~SQL
51
+ ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.orc.OrcSerde'
52
+ WITH SERDEPROPERTIES (
53
+ 'orc.column.index.access' = 'false'
54
+ )
55
+ STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat'
56
+ OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
57
+ SQL
58
+ when :orc_index_access
67
59
  'STORED AS ORC'
60
+ when :json
61
+ "ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'"
68
62
  else
69
63
  raise Errors::UnsupportedTableFormat, format.to_s
70
64
  end
data/lib/egis/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Egis
4
- VERSION = '1.5.0'
4
+ VERSION = '2.0.0'
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: egis
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.5.0
4
+ version: 2.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Agnieszka Czereba
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2021-05-14 00:00:00.000000000 Z
12
+ date: 2021-12-16 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: aws-sdk-athena