pg_column_byte_packer 1.0.0 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.ruby-version +1 -1
- data/README.md +2 -2
- data/lib/pg_column_byte_packer.rb +100 -17
- data/lib/pg_column_byte_packer/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8ea8169b533cd62fa0b0424f6a45bf27458fe2046bd4788a87733ac768a069dc
|
4
|
+
data.tar.gz: 3a9dd826059c7e874fdd5822fd6f7ca76763632b3ae943e2eb0b609734a9d260
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7bd2c8c689484c1c55fd4bfc6b7eda5c9f333087b35ba02b6cc697dced20d9769360de66c645b450e295424543c78e1140d5b78b7b59447d95681e1981870d8d
|
7
|
+
data.tar.gz: df2f7e5bc6cc6aa8dd79746bc465854c142ba6ede2e0109021cd1b64bcc216891cd078ab20839950cae668c89ad8d46cad04f53389f2b309a41f496e7c730496
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
2.
|
1
|
+
ruby-2.5.7
|
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# PostgreSQL Column Byte Packer
|
2
2
|
|
3
|
-
[![Build Status](https://travis-ci.
|
3
|
+
[![Build Status](https://travis-ci.com/braintree/pg_column_byte_packer.svg?branch=master)](https://travis-ci.org/braintree/pg_column_byte_packer/)
|
4
4
|
|
5
5
|
tl;dr: Provides facilities for laying out table column order to optimize for disk space usage both in ActiveRecord migrations and `pg_dump` generated SQL schema files. The general idea and relevant PostgreSQL internals are described in [On Rocks and Sand](https://www.2ndquadrant.com/en/blog/on-rocks-and-sand/).
|
6
6
|
|
@@ -51,7 +51,7 @@ After checking out the repo, run `bin/setup` to install dependencies. Then, run
|
|
51
51
|
|
52
52
|
Running tests will automatically create a test database in the locally running Postgres server. You can find the connection parameters in `spec/spec_helper.rb`, but setting the environment variables `PGHOST`, `PGPORT`, `PGUSER`, and `PGPASSWORD` will override the defaults.
|
53
53
|
|
54
|
-
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
54
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org). To ignore local changes (say to `.ruby-version`) you can do `rake build release:source_control_push release:rubygem_push`.
|
55
55
|
|
56
56
|
## License
|
57
57
|
|
@@ -29,6 +29,37 @@ module PgColumnBytePacker
|
|
29
29
|
end
|
30
30
|
end
|
31
31
|
|
32
|
+
if type_schema == "pg_catalog"
|
33
|
+
sql_type = case sql_type
|
34
|
+
when "char", "\"char\""
|
35
|
+
# The SQL standard defines bare 'character' as a single character
|
36
|
+
# type. The quoted variant is also single character, but raises
|
37
|
+
# an error on inputs longer than a single character (unlike the
|
38
|
+
# which requires it to be silently trucated).
|
39
|
+
#
|
40
|
+
# We don't yet use the length for the character type, but it seems
|
41
|
+
# worth retaining it here for completeness (rather than solely
|
42
|
+
# handling the quoted "char" case, which is what we have to have
|
43
|
+
# for things to work minimally).
|
44
|
+
#
|
45
|
+
# See: https://www.postgresql.org/docs/current/datatype-character.html
|
46
|
+
# and format_type.c
|
47
|
+
"character(1)"
|
48
|
+
when "bit"
|
49
|
+
# The SQL standard defines bare 'bit' as a single bit type.
|
50
|
+
#
|
51
|
+
# See: https://www.postgresql.org/docs/current/datatype-bit.html
|
52
|
+
"bit(1)"
|
53
|
+
when "\"bit\""
|
54
|
+
# The quoted variant, instead of being parallel to the quoted
|
55
|
+
# variant of char, allows any number of bits, but is technically
|
56
|
+
# a different entry in pg_type than varchar.
|
57
|
+
"bit"
|
58
|
+
else
|
59
|
+
sql_type
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
32
63
|
bare_type = if type_schema
|
33
64
|
if sql_type.start_with?("#{type_schema}.")
|
34
65
|
sql_type.sub("#{type_schema}.", "")
|
@@ -58,9 +89,16 @@ module PgColumnBytePacker
|
|
58
89
|
# doesn't match the canonical name in pg_type; e.g., "float8" is
|
59
90
|
# canonical, but pgdump outputs "double precision".
|
60
91
|
case bare_type
|
61
|
-
when "bigint", "double precision", /\Atimestamp
|
92
|
+
when "bigint", "double precision", /\Atimestamp(tz| with time zone| without time zone)?/, /\Abigserial( primary key)?/
|
62
93
|
8 # Actual alignment for these types.
|
63
|
-
when "integer", "date",
|
94
|
+
when "integer", "date", /\Atime(tz| with time zone| without time zone)?/, /\Adecimal(\([^\)]+\))?/, "real", /\Aserial( primary key)?/
|
95
|
+
# Note: unlike the others which always take a fixed amount of space,
|
96
|
+
# the numeric/decimal type is stored in a variable amount of space (see:
|
97
|
+
# https://www.postgresql.org/docs/10/datatype-numeric.html) but pg_type
|
98
|
+
# shows that its alignment is the same as integer. Postgres canonicalizes
|
99
|
+
# this type to numeric, but we have to still support the decimal
|
100
|
+
# designation for ActiveRecord inputs.
|
101
|
+
|
64
102
|
4 # Actual alignment for these types.
|
65
103
|
when "bytea"
|
66
104
|
# These types generally have an alignment of 4, but values of at most 127 bytes
|
@@ -68,20 +106,46 @@ module PgColumnBytePacker
|
|
68
106
|
# Since we'd expect any binary fields to be relatively long, we'll assume they
|
69
107
|
# won't fit into the optimized case.
|
70
108
|
4
|
71
|
-
when "text", "citext", "character varying"
|
72
|
-
# These types generally have an alignment of 4
|
73
|
-
#
|
74
|
-
#
|
75
|
-
#
|
76
|
-
#
|
77
|
-
#
|
109
|
+
when "text", "citext", "character varying", "bit varying", "bit"
|
110
|
+
# These types generally have an alignment of 4 (as designated by pg_type
|
111
|
+
# having a typalign value of 'i', but they're special in that small values
|
112
|
+
# have an optimized storage layout. Beyond the optimized storage layout, though,
|
113
|
+
# these small values also are not required to respect the alignment the type
|
114
|
+
# would otherwise have. Specifically, values with a size of at most 127 bytes
|
115
|
+
# aren't aligned. That 127 byte cap, however, includes an overhead byte to store
|
116
|
+
# the length, and so in reality the max is 126 bytes. Interestingly TOASTable
|
117
|
+
# values are also treated that way, but we don't have a good way of knowing which
|
118
|
+
# values those will be.
|
119
|
+
#
|
120
|
+
# See: `fill_val()` in src/backend/access/common/heaptuple.c (in the conditional
|
121
|
+
# `else if (att->attlen == -1)` branch.
|
122
|
+
#
|
123
|
+
# When no limit modifier has been applied we don't have a good heuristic for
|
124
|
+
# determining which columns are likely to be long or short, so we currently
|
125
|
+
# just slot them all after the columns we believe will always be long.
|
78
126
|
3
|
79
127
|
when /\Acharacter varying\(\d+\)/
|
128
|
+
# However, when a limit modifier has been applied we can make stronger assumptions.
|
80
129
|
if (limit = /\Acharacter varying\((\d+)\)/.match(sql_type)[1])
|
81
|
-
if limit.to_i <=
|
82
|
-
|
130
|
+
if limit.to_i <= 126
|
131
|
+
# If we know the limit guarantees we'll fit into the unaligned storage
|
132
|
+
# optimization, then we can go ahead and treat it as unaligned.
|
133
|
+
1
|
83
134
|
else
|
84
|
-
|
135
|
+
# If the limit would allow strings that require alignment, then we assume it's
|
136
|
+
# more likely to exeed the optimization cap and slot them after the columns
|
137
|
+
# we know for certain will require integer alignment.
|
138
|
+
3
|
139
|
+
end
|
140
|
+
end
|
141
|
+
when /\Abit varying\(\d+\)/
|
142
|
+
# This type is functionally the same as varchar above, but the calculation we need
|
143
|
+
# to do has been scaled since the limit is expressed in bits rather than bytes.
|
144
|
+
if (limit = /\Abit varying\((\d+)\)/.match(sql_type)[1])
|
145
|
+
if limit.to_i <= (126 * 8)
|
146
|
+
1
|
147
|
+
else
|
148
|
+
3
|
85
149
|
end
|
86
150
|
end
|
87
151
|
when /\Afloat(\(\d+\))?/
|
@@ -98,14 +162,30 @@ module PgColumnBytePacker
|
|
98
162
|
else
|
99
163
|
8 # Default is double precision
|
100
164
|
end
|
101
|
-
when "smallint", "boolean"
|
102
|
-
2 # Actual alignment for these types.
|
103
165
|
else
|
166
|
+
type_without_modifier, modifier = bare_type.match(/\A([^\(]+)(\([^\)]+\))?/)[1..-1]
|
167
|
+
|
168
|
+
pg_type_typname = case type_without_modifier
|
169
|
+
when "boolean"
|
170
|
+
"bool"
|
171
|
+
when "smallint"
|
172
|
+
"int2"
|
173
|
+
when "character"
|
174
|
+
"char"
|
175
|
+
else
|
176
|
+
# There are other cases you might expect to see here (for other system
|
177
|
+
# types like varchar and varbit where the external name generated by
|
178
|
+
# format_type_extended() in Postgres's format_type.c doesn't match the
|
179
|
+
# pg_type typaname), but we've already handled them separately above
|
180
|
+
# (since we have additional rules to apply to them).
|
181
|
+
type_without_modifier
|
182
|
+
end
|
183
|
+
|
104
184
|
typtype, typalign = connection.select_rows(<<~SQL, "Type Lookup").first
|
105
185
|
SELECT typ.typtype, typ.typalign
|
106
186
|
FROM pg_type typ
|
107
187
|
JOIN pg_namespace nsp ON nsp.oid = typ.typnamespace
|
108
|
-
WHERE typname = '#{connection.quote_string(
|
188
|
+
WHERE typname = '#{connection.quote_string(pg_type_typname)}'
|
109
189
|
#{type_schema ? "AND nsp.nspname = '#{connection.quote_string(type_schema)}'" : ""}
|
110
190
|
SQL
|
111
191
|
|
@@ -118,7 +198,10 @@ module PgColumnBytePacker
|
|
118
198
|
else
|
119
199
|
case typalign
|
120
200
|
when "c"
|
121
|
-
|
201
|
+
# Character types, for example, occupy a variable amount of space
|
202
|
+
# (though fixed in the sense that it's specified up front for each
|
203
|
+
# column definition) but require no alignment.
|
204
|
+
1
|
122
205
|
when "s"
|
123
206
|
2
|
124
207
|
when "i"
|
@@ -126,7 +209,7 @@ module PgColumnBytePacker
|
|
126
209
|
when "d"
|
127
210
|
8
|
128
211
|
else
|
129
|
-
|
212
|
+
1
|
130
213
|
end
|
131
214
|
end
|
132
215
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pg_column_byte_packer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- jcoleman
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-12-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -203,7 +203,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
203
203
|
- !ruby/object:Gem::Version
|
204
204
|
version: '0'
|
205
205
|
requirements: []
|
206
|
-
rubygems_version: 3.
|
206
|
+
rubygems_version: 3.0.8
|
207
207
|
signing_key:
|
208
208
|
specification_version: 4
|
209
209
|
summary: Auto-order table columns for optimize disk space usage
|