pg_column_byte_packer 1.0.0 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0b3b506f62b7b77b22577939d71057487ac9577573927e31fe9ae88b03e18a24
4
- data.tar.gz: 54b478a7824de37ade407f615f25f246014d61ec311087bba86779d71d6cb237
3
+ metadata.gz: 8ea8169b533cd62fa0b0424f6a45bf27458fe2046bd4788a87733ac768a069dc
4
+ data.tar.gz: 3a9dd826059c7e874fdd5822fd6f7ca76763632b3ae943e2eb0b609734a9d260
5
5
  SHA512:
6
- metadata.gz: 8a71b91c4644867ee0462da1d4f08321ddc4aeb409e0910a074ade6fb881338e3445c94c22a0c95fbab21a9c8617f24006404cf262cf8ab1c806076a212a4f05
7
- data.tar.gz: 3b752736a44949477bc6d65f41a58901a8ac52d4b8b0637a84d0eeb247b23a2067d39c6a939f7c0a2c69acf0dad5f28066e3e99ff76079dc72f9d87c09de81a4
6
+ metadata.gz: 7bd2c8c689484c1c55fd4bfc6b7eda5c9f333087b35ba02b6cc697dced20d9769360de66c645b450e295424543c78e1140d5b78b7b59447d95681e1981870d8d
7
+ data.tar.gz: df2f7e5bc6cc6aa8dd79746bc465854c142ba6ede2e0109021cd1b64bcc216891cd078ab20839950cae668c89ad8d46cad04f53389f2b309a41f496e7c730496
@@ -1 +1 @@
1
- 2.6.0
1
+ ruby-2.5.7
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # PostgreSQL Column Byte Packer
2
2
 
3
- [![Build Status](https://travis-ci.org/braintree/pg_column_byte_packer.svg?branch=master)](https://travis-ci.org/braintree/pg_column_byte_packer/)
3
+ [![Build Status](https://travis-ci.com/braintree/pg_column_byte_packer.svg?branch=master)](https://travis-ci.org/braintree/pg_column_byte_packer/)
4
4
 
5
5
  tl;dr: Provides facilities for laying out table column order to optimize for disk space usage both in ActiveRecord migrations and `pg_dump` generated SQL schema files. The general idea and relevant PostgreSQL internals are described in [On Rocks and Sand](https://www.2ndquadrant.com/en/blog/on-rocks-and-sand/).
6
6
 
@@ -51,7 +51,7 @@ After checking out the repo, run `bin/setup` to install dependencies. Then, run
51
51
 
52
52
  Running tests will automatically create a test database in the locally running Postgres server. You can find the connection parameters in `spec/spec_helper.rb`, but setting the environment variables `PGHOST`, `PGPORT`, `PGUSER`, and `PGPASSWORD` will override the defaults.
53
53
 
54
- To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
54
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org). To ignore local changes (say to `.ruby-version`) you can do `rake build release:source_control_push release:rubygem_push`.
55
55
 
56
56
  ## License
57
57
 
@@ -29,6 +29,37 @@ module PgColumnBytePacker
29
29
  end
30
30
  end
31
31
 
32
+ if type_schema == "pg_catalog"
33
+ sql_type = case sql_type
34
+ when "char", "\"char\""
35
+ # The SQL standard defines bare 'character' as a single character
36
+ # type. The quoted variant is also single character, but raises
37
+ # an error on inputs longer than a single character (unlike the
38
+ # which requires it to be silently trucated).
39
+ #
40
+ # We don't yet use the length for the character type, but it seems
41
+ # worth retaining it here for completeness (rather than solely
42
+ # handling the quoted "char" case, which is what we have to have
43
+ # for things to work minimally).
44
+ #
45
+ # See: https://www.postgresql.org/docs/current/datatype-character.html
46
+ # and format_type.c
47
+ "character(1)"
48
+ when "bit"
49
+ # The SQL standard defines bare 'bit' as a single bit type.
50
+ #
51
+ # See: https://www.postgresql.org/docs/current/datatype-bit.html
52
+ "bit(1)"
53
+ when "\"bit\""
54
+ # The quoted variant, instead of being parallel to the quoted
55
+ # variant of char, allows any number of bits, but is technically
56
+ # a different entry in pg_type than varchar.
57
+ "bit"
58
+ else
59
+ sql_type
60
+ end
61
+ end
62
+
32
63
  bare_type = if type_schema
33
64
  if sql_type.start_with?("#{type_schema}.")
34
65
  sql_type.sub("#{type_schema}.", "")
@@ -58,9 +89,16 @@ module PgColumnBytePacker
58
89
  # doesn't match the canonical name in pg_type; e.g., "float8" is
59
90
  # canonical, but pgdump outputs "double precision".
60
91
  case bare_type
61
- when "bigint", "double precision", /\Atimestamp.*/, /\Abigserial( primary key)?/
92
+ when "bigint", "double precision", /\Atimestamp(tz| with time zone| without time zone)?/, /\Abigserial( primary key)?/
62
93
  8 # Actual alignment for these types.
63
- when "integer", "date", "decimal", "real", /\Aserial( primary key)?/
94
+ when "integer", "date", /\Atime(tz| with time zone| without time zone)?/, /\Adecimal(\([^\)]+\))?/, "real", /\Aserial( primary key)?/
95
+ # Note: unlike the others which always take a fixed amount of space,
96
+ # the numeric/decimal type is stored in a variable amount of space (see:
97
+ # https://www.postgresql.org/docs/10/datatype-numeric.html) but pg_type
98
+ # shows that its alignment is the same as integer. Postgres canonicalizes
99
+ # this type to numeric, but we have to still support the decimal
100
+ # designation for ActiveRecord inputs.
101
+
64
102
  4 # Actual alignment for these types.
65
103
  when "bytea"
66
104
  # These types generally have an alignment of 4, but values of at most 127 bytes
@@ -68,20 +106,46 @@ module PgColumnBytePacker
68
106
  # Since we'd expect any binary fields to be relatively long, we'll assume they
69
107
  # won't fit into the optimized case.
70
108
  4
71
- when "text", "citext", "character varying"
72
- # These types generally have an alignment of 4, but values of at most 127 bytes
73
- # long they are optimized into 2 byte alignment.
74
- # Since we don't have a good heuristic for determining which columns are likely
75
- # to be long or short, we currently just slot them all after the columns we
76
- # believe will always be long.
77
- # If desired we could also differentiate on length limits if set.
109
+ when "text", "citext", "character varying", "bit varying", "bit"
110
+ # These types generally have an alignment of 4 (as designated by pg_type
111
+ # having a typalign value of 'i', but they're special in that small values
112
+ # have an optimized storage layout. Beyond the optimized storage layout, though,
113
+ # these small values also are not required to respect the alignment the type
114
+ # would otherwise have. Specifically, values with a size of at most 127 bytes
115
+ # aren't aligned. That 127 byte cap, however, includes an overhead byte to store
116
+ # the length, and so in reality the max is 126 bytes. Interestingly TOASTable
117
+ # values are also treated that way, but we don't have a good way of knowing which
118
+ # values those will be.
119
+ #
120
+ # See: `fill_val()` in src/backend/access/common/heaptuple.c (in the conditional
121
+ # `else if (att->attlen == -1)` branch.
122
+ #
123
+ # When no limit modifier has been applied we don't have a good heuristic for
124
+ # determining which columns are likely to be long or short, so we currently
125
+ # just slot them all after the columns we believe will always be long.
78
126
  3
79
127
  when /\Acharacter varying\(\d+\)/
128
+ # However, when a limit modifier has been applied we can make stronger assumptions.
80
129
  if (limit = /\Acharacter varying\((\d+)\)/.match(sql_type)[1])
81
- if limit.to_i <= 127
82
- 2
130
+ if limit.to_i <= 126
131
+ # If we know the limit guarantees we'll fit into the unaligned storage
132
+ # optimization, then we can go ahead and treat it as unaligned.
133
+ 1
83
134
  else
84
- 4
135
+ # If the limit would allow strings that require alignment, then we assume it's
136
+ # more likely to exeed the optimization cap and slot them after the columns
137
+ # we know for certain will require integer alignment.
138
+ 3
139
+ end
140
+ end
141
+ when /\Abit varying\(\d+\)/
142
+ # This type is functionally the same as varchar above, but the calculation we need
143
+ # to do has been scaled since the limit is expressed in bits rather than bytes.
144
+ if (limit = /\Abit varying\((\d+)\)/.match(sql_type)[1])
145
+ if limit.to_i <= (126 * 8)
146
+ 1
147
+ else
148
+ 3
85
149
  end
86
150
  end
87
151
  when /\Afloat(\(\d+\))?/
@@ -98,14 +162,30 @@ module PgColumnBytePacker
98
162
  else
99
163
  8 # Default is double precision
100
164
  end
101
- when "smallint", "boolean"
102
- 2 # Actual alignment for these types.
103
165
  else
166
+ type_without_modifier, modifier = bare_type.match(/\A([^\(]+)(\([^\)]+\))?/)[1..-1]
167
+
168
+ pg_type_typname = case type_without_modifier
169
+ when "boolean"
170
+ "bool"
171
+ when "smallint"
172
+ "int2"
173
+ when "character"
174
+ "char"
175
+ else
176
+ # There are other cases you might expect to see here (for other system
177
+ # types like varchar and varbit where the external name generated by
178
+ # format_type_extended() in Postgres's format_type.c doesn't match the
179
+ # pg_type typaname), but we've already handled them separately above
180
+ # (since we have additional rules to apply to them).
181
+ type_without_modifier
182
+ end
183
+
104
184
  typtype, typalign = connection.select_rows(<<~SQL, "Type Lookup").first
105
185
  SELECT typ.typtype, typ.typalign
106
186
  FROM pg_type typ
107
187
  JOIN pg_namespace nsp ON nsp.oid = typ.typnamespace
108
- WHERE typname = '#{connection.quote_string(bare_type)}'
188
+ WHERE typname = '#{connection.quote_string(pg_type_typname)}'
109
189
  #{type_schema ? "AND nsp.nspname = '#{connection.quote_string(type_schema)}'" : ""}
110
190
  SQL
111
191
 
@@ -118,7 +198,10 @@ module PgColumnBytePacker
118
198
  else
119
199
  case typalign
120
200
  when "c"
121
- 0
201
+ # Character types, for example, occupy a variable amount of space
202
+ # (though fixed in the sense that it's specified up front for each
203
+ # column definition) but require no alignment.
204
+ 1
122
205
  when "s"
123
206
  2
124
207
  when "i"
@@ -126,7 +209,7 @@ module PgColumnBytePacker
126
209
  when "d"
127
210
  8
128
211
  else
129
- 0
212
+ 1
130
213
  end
131
214
  end
132
215
  end
@@ -1,3 +1,3 @@
1
1
  module PgColumnBytePacker
2
- VERSION = "1.0.0"
2
+ VERSION = "1.1.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pg_column_byte_packer
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - jcoleman
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-09-29 00:00:00.000000000 Z
11
+ date: 2020-12-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -203,7 +203,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
203
203
  - !ruby/object:Gem::Version
204
204
  version: '0'
205
205
  requirements: []
206
- rubygems_version: 3.1.2
206
+ rubygems_version: 3.0.8
207
207
  signing_key:
208
208
  specification_version: 4
209
209
  summary: Auto-order table columns for optimize disk space usage