activerecord-pg-format-db-structure 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/README.md ADDED
@@ -0,0 +1,367 @@
1
+ # activerecord-pg-format-db-structure
2
+
3
+ Automatically cleans up your `structure.sql` file after each rails migration.
4
+
5
+ By default, it will:
6
+
7
+ * Inline primary key declarations
8
+ * Inline SERIAL type declarations
9
+ * Inline table constraints
10
+ * Move index creation below their corresponding tables
11
+ * Group `ALTER TABLE` statements into a single statement per table
12
+ * Removes unnecessary whitespace
13
+
14
+ The task will transform this raw `structure.sql`:
15
+
16
+ <details>
17
+
18
+ <summary>Click to expand</summary>
19
+
20
+ ```sql
21
+ --
22
+ -- Name: pgcrypto; Type: EXTENSION; Schema: -; Owner: -
23
+ --
24
+
25
+ CREATE EXTENSION IF NOT EXISTS pgcrypto WITH SCHEMA public;
26
+
27
+
28
+ --
29
+ -- Name: EXTENSION pgcrypto; Type: COMMENT; Schema: -; Owner: -
30
+ --
31
+
32
+ COMMENT ON EXTENSION pgcrypto IS 'cryptographic functions';
33
+
34
+ --
35
+ -- Name: comments; Type: TABLE; Schema: public; Owner: -
36
+ --
37
+
38
+ CREATE TABLE public.comments (
39
+ id bigint NOT NULL,
40
+ user_id bigint NOT NULL,
41
+ post_id bigint NOT NULL,
42
+ created_at timestamp(6) without time zone NOT NULL,
43
+ updated_at timestamp(6) without time zone NOT NULL
44
+ );
45
+
46
+
47
+ --
48
+ -- Name: comments_id_seq; Type: SEQUENCE; Schema: public; Owner: -
49
+ --
50
+
51
+ CREATE SEQUENCE public.comments_id_seq
52
+ START WITH 1
53
+ INCREMENT BY 1
54
+ NO MINVALUE
55
+ NO MAXVALUE
56
+ CACHE 1;
57
+
58
+
59
+ --
60
+ -- Name: comments_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: -
61
+ --
62
+
63
+ ALTER SEQUENCE public.comments_id_seq OWNED BY public.comments.id;
64
+
65
+ --
66
+ -- Name: posts; Type: TABLE; Schema: public; Owner: -
67
+ --
68
+
69
+ CREATE TABLE public.posts (
70
+ id bigint NOT NULL,
71
+ created_at timestamp(6) without time zone NOT NULL,
72
+ updated_at timestamp(6) without time zone NOT NULL
73
+ );
74
+
75
+
76
+ --
77
+ -- Name: posts_id_seq; Type: SEQUENCE; Schema: public; Owner: -
78
+ --
79
+
80
+ CREATE SEQUENCE public.posts_id_seq
81
+ START WITH 1
82
+ INCREMENT BY 1
83
+ NO MINVALUE
84
+ NO MAXVALUE
85
+ CACHE 1;
86
+
87
+
88
+ --
89
+ -- Name: posts_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: -
90
+ --
91
+
92
+ ALTER SEQUENCE public.posts_id_seq OWNED BY public.posts.id;
93
+
94
+
95
+ --
96
+ -- Name: users; Type: TABLE; Schema: public; Owner: -
97
+ --
98
+
99
+ CREATE TABLE public.users (
100
+ id bigint NOT NULL,
101
+ created_at timestamp(6) without time zone NOT NULL,
102
+ updated_at timestamp(6) without time zone NOT NULL
103
+ );
104
+
105
+
106
+ --
107
+ -- Name: users_id_seq; Type: SEQUENCE; Schema: public; Owner: -
108
+ --
109
+
110
+ CREATE SEQUENCE public.users_id_seq
111
+ START WITH 1
112
+ INCREMENT BY 1
113
+ NO MINVALUE
114
+ NO MAXVALUE
115
+ CACHE 1;
116
+
117
+
118
+ --
119
+ -- Name: users_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: -
120
+ --
121
+
122
+ ALTER SEQUENCE public.users_id_seq OWNED BY public.users.id;
123
+
124
+ --
125
+ -- Name: comments id; Type: DEFAULT; Schema: public; Owner: -
126
+ --
127
+
128
+ ALTER TABLE ONLY public.comments ALTER COLUMN id SET DEFAULT nextval('public.comments_id_seq'::regclass);
129
+
130
+ --
131
+ -- Name: posts id; Type: DEFAULT; Schema: public; Owner: -
132
+ --
133
+
134
+ ALTER TABLE ONLY public.posts ALTER COLUMN id SET DEFAULT nextval('public.posts_id_seq'::regclass);
135
+
136
+ --
137
+ -- Name: users id; Type: DEFAULT; Schema: public; Owner: -
138
+ --
139
+
140
+ ALTER TABLE ONLY public.users ALTER COLUMN id SET DEFAULT nextval('public.users_id_seq'::regclass);
141
+
142
+ --
143
+ -- Name: comments comments_pkey; Type: CONSTRAINT; Schema: public; Owner: -
144
+ --
145
+
146
+ ALTER TABLE ONLY public.comments
147
+ ADD CONSTRAINT comments_pkey PRIMARY KEY (id);
148
+
149
+ --
150
+ -- Name: posts posts_pkey; Type: CONSTRAINT; Schema: public; Owner: -
151
+ --
152
+
153
+ ALTER TABLE ONLY public.posts
154
+ ADD CONSTRAINT posts_pkey PRIMARY KEY (id);
155
+
156
+ --
157
+ -- Name: users users_pkey; Type: CONSTRAINT; Schema: public; Owner: -
158
+ --
159
+
160
+ ALTER TABLE ONLY public.users
161
+ ADD CONSTRAINT users_pkey PRIMARY KEY (id);
162
+
163
+ --
164
+ -- Name: comments fk_rails_0000000001; Type: FK CONSTRAINT; Schema: public; Owner: -
165
+ --
166
+
167
+ ALTER TABLE ONLY public.comments
168
+ ADD CONSTRAINT fk_rails_0000000001 FOREIGN KEY (post_id) REFERENCES public.posts(id);
169
+
170
+ --
171
+ -- Name: comments fk_rails_0000000002; Type: FK CONSTRAINT; Schema: public; Owner: -
172
+ --
173
+
174
+ ALTER TABLE ONLY public.comments
175
+ ADD CONSTRAINT fk_rails_0000000002 FOREIGN KEY (user_id) REFERENCES public.users(id);
176
+
177
+ INSERT INTO "schema_migrations" (version) VALUES
178
+ ('20250124155339');
179
+ ```
180
+ </details>
181
+
182
+ into this much more compact and normalized version:
183
+
184
+ ```sql
185
+ -- Name: pgcrypto; Type: EXTENSION
186
+
187
+ CREATE EXTENSION IF NOT EXISTS pgcrypto WITH SCHEMA public;
188
+
189
+
190
+ -- Name: comments; Type: TABLE;
191
+
192
+ CREATE TABLE public.comments (
193
+ id bigserial PRIMARY KEY,
194
+ user_id bigint NOT NULL,
195
+ post_id bigint NOT NULL,
196
+ created_at timestamp(6) NOT NULL,
197
+ updated_at timestamp(6) NOT NULL
198
+ );
199
+
200
+
201
+ -- Name: posts; Type: TABLE;
202
+
203
+ CREATE TABLE public.posts (
204
+ id bigserial PRIMARY KEY,
205
+ created_at timestamp(6) NOT NULL,
206
+ updated_at timestamp(6) NOT NULL
207
+ );
208
+
209
+
210
+ -- Name: users; Type: TABLE;
211
+
212
+ CREATE TABLE public.users (
213
+ id bigserial PRIMARY KEY,
214
+ created_at timestamp(6) NOT NULL,
215
+ updated_at timestamp(6) NOT NULL
216
+ );
217
+
218
+ ALTER TABLE ONLY public.comments
219
+ ADD CONSTRAINT fk_rails_0000000001 FOREIGN KEY (post_id) REFERENCES public.posts (id),
220
+ ADD CONSTRAINT fk_rails_0000000002 FOREIGN KEY (user_id) REFERENCES public.users (id);
221
+
222
+ INSERT INTO "schema_migrations" (version) VALUES
223
+ ('20250124155339');
224
+ ```
225
+
226
+ which is a lot more compact, easier to read, and reduces the risk of
227
+ getting random diffs between machines after each migration.
228
+
229
+ Those transformations are made by manipulating the SQL AST directly
230
+ using [pg_query](https://github.com/pganalyze/pg_query), and each
231
+ transformation is opt-in and can be run independently.
232
+
233
+ You can also add your own transforms (see below).
234
+
235
+
236
+ ## Installation
237
+
238
+ Add the following to your Gemfile:
239
+
240
+ ```ruby
241
+ gem 'activerecord-clean-db-structure'
242
+ ```
243
+
244
+ ## Usage
245
+
246
+ ### Rails
247
+
248
+ Adding the gem to your dependencies this will automatically hook the library into your `rake db:migrate` task.
249
+
250
+ If you want to configure which transforms to use, you can configure the library with the following:
251
+
252
+ ```ruby
253
+ Rails.application.configure do
254
+ config.activerecord_pg_format_db_structure.preprocessors = [
255
+ ActiveRecordPgFormatDbStructure::Preprocessors::RemoveWhitespaces
256
+ ]
257
+
258
+ config.activerecord_pg_format_db_structure.transforms = [
259
+ ActiveRecordPgFormatDbStructure::Transforms::RemoveCommentsOnExtensions,
260
+ ActiveRecordPgFormatDbStructure::Transforms::InlinePrimaryKeys,
261
+ # ActiveRecordPgFormatDbStructure::Transforms::InlineForeignKeys,
262
+ ActiveRecordPgFormatDbStructure::Transforms::InlineSerials,
263
+ ActiveRecordPgFormatDbStructure::Transforms::InlineConstraints,
264
+ ActiveRecordPgFormatDbStructure::Transforms::MoveIndicesAfterCreateTable,
265
+ ActiveRecordPgFormatDbStructure::Transforms::GroupAlterTableStatements
266
+ ]
267
+
268
+ config.activerecord_pg_format_db_structure.deparser = ActiveRecordPgFormatDbStructure::Deparser
269
+ end
270
+ ```
271
+
272
+ ### Use outside of Rails
273
+
274
+ ```ruby
275
+ require "activerecord-pg-format-db-structure/formatter"
276
+
277
+ structure = File.read("db/structure.sql")
278
+ formatted = ActiveRecordPgFormatDbStructure::Formatter.new.format(structure)
279
+ File.write("db/structure.sql", formatted)
280
+ ```
281
+
282
+ ## Preprocessors
283
+
284
+ ### RemoveWhitespaces
285
+
286
+ Remove unnecessary comment, whitespase and empty lines.
287
+
288
+ ## Transformers
289
+
290
+ ### RemoveCommentsOnExtensions
291
+
292
+ Remove COMMENT statement applied to extensions
293
+
294
+ ### InlinePrimaryKeys
295
+
296
+ Inlines primary keys with the table declaration
297
+
298
+ ### InlineForeignKeys
299
+
300
+ Inline foreign key constraints.
301
+
302
+ Note: using this transform makes the structure file no longer
303
+ loadable, since tables should be created before a foreign key
304
+ can target it, so it is not included by default.
305
+
306
+ ### InlineSerials
307
+
308
+ Inline SERIAL declaration inside table declaration.
309
+
310
+ Note: the logic looks for statements of this shape:
311
+
312
+ ```sql
313
+ ALTER TABLE ONLY ts.tn ALTER COLUMN c SET DEFAULT nextval('ts.tn_c_seq'::regclass);
314
+ ```
315
+
316
+ It also assumes that the associated sequence has default settings. A
317
+ later version could try to be more strict / validate that the
318
+ sequence indeed has default settings.
319
+
320
+ ### InlineConstraints
321
+
322
+ Inline non-foreign key constraints into table declaration
323
+
324
+ ### MoveIndicesAfterCreateTable
325
+
326
+ Move indice declaration just below the table they index
327
+
328
+ ### GroupAlterTableStatements
329
+
330
+ Group alter table statements into one operation per
331
+ table.
332
+
333
+ Should be run after other operations that inline alter statements.
334
+
335
+ ## Deparser
336
+
337
+ As of today, this is a bare implemenation that works with the current combination of tranformers.
338
+
339
+ As of now, it will only deparse `CREATE TABLE`, `CREATE INDEX` and
340
+ `ALTER TABLE` statements. Other statements will be kept unchanged from
341
+ the input SQL.
342
+
343
+ In order to support all statements, we will need to find a solution to more cleanly format SQL queries, as deparsing a `CREATE VIEW` statement will result in a single unreadable line if relying on `pg_query`.
344
+
345
+ ## Development
346
+
347
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
348
+
349
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and the created tag, and push the `.gem` file to [rubygems.org](https://rubygems.org).
350
+
351
+ ## Contributing
352
+
353
+ Bug reports and pull requests are welcome on GitHub at https://github.com/ReifyAB/activerecord-pg-format-db-structure. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [code of conduct](https://github.com/ReifyAB/activerecord-pg-format-db-structure/blob/main/CODE_OF_CONDUCT.md).
354
+
355
+ ## Credits
356
+
357
+ Using the awesome [pg_query](https://github.com/pganalyze/pg_query) that provides a ruby interface to the native Postgres SQL parser.
358
+
359
+ Inspired by the [activerecord-clean-db-structure](https://github.com/lfittl/activerecord-clean-db-structure) gem by [Lukas Fittl](https://github.com/lfittl). I wanted to achieved something like that, but using a proper SQL parser instead of search / replace using regexps.
360
+
361
+ ## License
362
+
363
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
364
+
365
+ ## Code of Conduct
366
+
367
+ Everyone interacting in the activerecord-pg-format-db-structure project's codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/ReifyAB/activerecord-pg-format-db-structure/blob/main/CODE_OF_CONDUCT.md).
data/Rakefile ADDED
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bundler/gem_tasks"
4
+ require "rspec/core/rake_task"
5
+
6
+ RSpec::Core::RakeTask.new(:spec)
7
+
8
+ require "rubocop/rake_task"
9
+
10
+ RuboCop::RakeTask.new
11
+
12
+ task default: %i[spec rubocop]
@@ -0,0 +1,76 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "pg_query"
4
+
5
+ module ActiveRecordPgFormatDbStructure
6
+ # Returns a list of SQL strings from a list of PgQuery::RawStmt.
7
+ class Deparser
8
+ attr_reader :source
9
+
10
+ def initialize(source)
11
+ @source = source
12
+ end
13
+
14
+ def deparse_raw_statement(raw_statement)
15
+ case raw_statement.to_h
16
+ in stmt: { create_stmt: _ }
17
+ deparse_create_stmt(raw_statement.stmt.create_stmt)
18
+ in stmt: { index_stmt: _ }
19
+ deparse_index_stmt(raw_statement.stmt.index_stmt)
20
+ in stmt: { alter_table_stmt: _ }
21
+ deparse_alter_table_stmt(raw_statement.stmt.alter_table_stmt)
22
+ else
23
+ keep_original_string(raw_statement)
24
+ end
25
+ end
26
+
27
+ private
28
+
29
+ def keep_original_string(raw_statement)
30
+ start = raw_statement.stmt_location || 0
31
+ stop = start + raw_statement.stmt_len
32
+ source[start..stop]
33
+ end
34
+
35
+ def deparse_index_stmt(index_stmt)
36
+ deparse_stmt(index_stmt)
37
+ end
38
+
39
+ def deparse_alter_table_stmt(alter_table_stmt)
40
+ "\n#{
41
+ deparse_stmt(alter_table_stmt)
42
+ .gsub(" ADD ", "\n ADD ")
43
+ .gsub(" ALTER COLUMN ", "\n ALTER COLUMN ")
44
+ }"
45
+ end
46
+
47
+ def deparse_stmt(stmt)
48
+ "\n#{PgQuery.deparse_stmt(stmt)};"
49
+ end
50
+
51
+ def deparse_create_stmt(create_stmt)
52
+ table_str = "\n\n\n-- Name: #{create_stmt.relation.relname}; Type: TABLE;\n\n"
53
+ table_str << PgQuery.deparse_stmt(
54
+ PgQuery::CreateStmt.new(
55
+ **create_stmt.to_h,
56
+ table_elts: []
57
+ )
58
+ )
59
+ table_str.gsub!(/\(\)\z/, "")
60
+ table_str << "("
61
+ table_str << create_stmt.table_elts.map do |elt|
62
+ "\n #{deparse_table_elt(elt)}"
63
+ end.join(",")
64
+ table_str << "\n);"
65
+ table_str
66
+ end
67
+
68
+ def deparse_table_elt(elt)
69
+ PgQuery.deparse_stmt(
70
+ PgQuery::CreateStmt.new(
71
+ relation: PgQuery::RangeVar.new(relname: "tmp"), table_elts: [elt]
72
+ )
73
+ ).sub(/\ACREATE TABLE ONLY tmp \(/, "").sub(/\)\z/, "")
74
+ end
75
+ end
76
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "deparser"
4
+ require_relative "../activerecord-pg-format-db-structure"
5
+
6
+ module ActiveRecordPgFormatDbStructure
7
+ # Formats & normalizes in place the given SQL string
8
+ class Formatter
9
+ attr_reader :preprocessors, :transforms, :deparser
10
+
11
+ def initialize(
12
+ preprocessors: DEFAULT_PREPROCESSORS,
13
+ transforms: DEFAULT_TRANSFORMS,
14
+ deparser: DEFAULT_DEPARSER
15
+ )
16
+ @preprocessors = preprocessors
17
+ @transforms = transforms
18
+ @deparser = deparser
19
+ end
20
+
21
+ def format(source)
22
+ preprocessors.each do |preprocessor|
23
+ preprocessor.new(source).preprocess!
24
+ end
25
+
26
+ raw_statements = PgQuery.parse(source).tree.stmts
27
+
28
+ transforms.each do |transform|
29
+ transform.new(raw_statements).transform!
30
+ end
31
+
32
+ raw_statements.map do |raw_statement|
33
+ deparser.new(source).deparse_raw_statement(raw_statement)
34
+ end.compact.join
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ActiveRecordPgFormatDbStructure
4
+ module Preprocessors
5
+ # Remove whitespace and SQL comments from an SQL string
6
+ class RemoveWhitespaces
7
+ attr_reader :source
8
+
9
+ def initialize(source)
10
+ @source = source
11
+ end
12
+
13
+ def preprocess!
14
+ # Remove trailing whitespace
15
+ source.gsub!(/[ \t]+$/, "")
16
+ source.gsub!(/\A\n/, "")
17
+ source.gsub!(/\n\n\z/, "\n")
18
+
19
+ # Remove useless comment lines
20
+ source.gsub!(/^--\n/, "")
21
+
22
+ # Remove useless, version-specific parts of comments
23
+ source.gsub!(/^-- (.*); Schema: ([\w.]+|-); Owner: -.*/, '-- \1')
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ActiveRecordPgFormatDbStructure
4
+ # Setup for Rails
5
+ class Railtie < Rails::Railtie
6
+ config.activerecord_pg_format_db_structure = ActiveSupport::OrderedOptions.new
7
+ config.activerecord_pg_format_db_structure.preprocessors = DEFAULT_PREPROCESSORS.dup
8
+ config.activerecord_pg_format_db_structure.transforms = DEFAULT_TRANSFORMS.dup
9
+ config.activerecord_pg_format_db_structure.deparser = DEFAULT_DEPARSER
10
+
11
+ rake_tasks do
12
+ load "activerecord-pg-format-db-structure/tasks/clean_db_structure.rake"
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../formatter"
4
+
5
+ Rake::Task["db:schema:dump"].enhance do
6
+ formatter = ActiveRecordPgFormatDbStructure::Formatter.new(
7
+ **Rails.application.config.activerecord_pg_format_db_structure
8
+ )
9
+
10
+ ActiveRecord::Base.configurations.configs_for(env_name: ActiveRecord::Tasks::DatabaseTasks.env).each do |db_config|
11
+ filename = ActiveRecord::Tasks::DatabaseTasks.schema_dump_path(db_config, :sql)
12
+ next unless File.exist?(filename)
13
+
14
+ formatted = formatter.format(File.read(filename))
15
+ File.write(filename, formatted)
16
+ end
17
+ end
@@ -0,0 +1,80 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "pg_query"
4
+
5
+ module ActiveRecordPgFormatDbStructure
6
+ module Transforms
7
+ # Group alter table statements into one operation per
8
+ # table. Should be run after other operations that inline alter statements.
9
+ class GroupAlterTableStatements
10
+ attr_reader :raw_statements
11
+
12
+ def initialize(raw_statements)
13
+ @raw_statements = raw_statements
14
+ end
15
+
16
+ def transform!
17
+ alter_groups = extract_alter_table_statements!
18
+
19
+ return if alter_groups.empty?
20
+
21
+ insert_index = raw_statements.each_with_index.map do |s, i|
22
+ # after all tables, materialized views and indices
23
+ i if s.stmt.to_h in { create_stmt: _ } | { create_table_as_stmt: _ } | { index_stmt: _ }
24
+ end.compact.last
25
+
26
+ sort_groups(alter_groups).each do |_, alters| # rubocop:disable Style/HashEachMethods
27
+ alter = sort_alters(alters).reduce do |altera, alterb|
28
+ altera.stmt.alter_table_stmt.cmds = altera.stmt.alter_table_stmt.cmds + alterb.stmt.alter_table_stmt.cmds
29
+ altera
30
+ end
31
+ raw_statements.insert(insert_index + 1, alter)
32
+ end
33
+ end
34
+
35
+ private
36
+
37
+ def sort_alters(alters)
38
+ alters.sort_by do |alter|
39
+ case alter.stmt.alter_table_stmt.to_h
40
+ in cmds: [{
41
+ alter_table_cmd: {
42
+ subtype: :AT_AddConstraint,
43
+ def: { constraint: {
44
+ contype: :CONSTR_FOREIGN,
45
+ fk_attrs: [{string: {sval: fk_attr}}],
46
+ }}
47
+ }
48
+ }]
49
+ [1, fk_attr]
50
+ else
51
+ [2, ""]
52
+ end
53
+ end
54
+ end
55
+
56
+ def sort_groups(groups)
57
+ groups.sort_by { |relation, _| [relation[:schemaname], relation[:relname]] }.reverse
58
+ end
59
+
60
+ def extract_alter_table_statements!
61
+ alter_groups = {}
62
+ raw_statements.delete_if do |s|
63
+ next unless s.stmt.to_h in alter_table_stmt: {
64
+ objtype: :OBJECT_TABLE,
65
+ relation: {
66
+ schemaname:,
67
+ relname:
68
+ }
69
+ }
70
+
71
+ relation = { schemaname:, relname: }
72
+ alter_groups[relation] ||= []
73
+ alter_groups[relation] << s
74
+ true
75
+ end
76
+ alter_groups
77
+ end
78
+ end
79
+ end
80
+ end