activerecord-pg-format-db-structure 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md ADDED
@@ -0,0 +1,367 @@
1
+ # activerecord-pg-format-db-structure
2
+
3
+ Automatically cleans up your `structure.sql` file after each rails migration.
4
+
5
+ By default, it will:
6
+
7
+ * Inline primary key declarations
8
+ * Inline SERIAL type declarations
9
+ * Inline table constraints
10
+ * Move index creation below their corresponding tables
11
+ * Group `ALTER TABLE` statements into a single statement per table
12
+ * Removes unnecessary whitespace
13
+
14
+ The task will transform this raw `structure.sql`:
15
+
16
+ <details>
17
+
18
+ <summary>Click to expand</summary>
19
+
20
+ ```sql
21
+ --
22
+ -- Name: pgcrypto; Type: EXTENSION; Schema: -; Owner: -
23
+ --
24
+
25
+ CREATE EXTENSION IF NOT EXISTS pgcrypto WITH SCHEMA public;
26
+
27
+
28
+ --
29
+ -- Name: EXTENSION pgcrypto; Type: COMMENT; Schema: -; Owner: -
30
+ --
31
+
32
+ COMMENT ON EXTENSION pgcrypto IS 'cryptographic functions';
33
+
34
+ --
35
+ -- Name: comments; Type: TABLE; Schema: public; Owner: -
36
+ --
37
+
38
+ CREATE TABLE public.comments (
39
+ id bigint NOT NULL,
40
+ user_id bigint NOT NULL,
41
+ post_id bigint NOT NULL,
42
+ created_at timestamp(6) without time zone NOT NULL,
43
+ updated_at timestamp(6) without time zone NOT NULL
44
+ );
45
+
46
+
47
+ --
48
+ -- Name: comments_id_seq; Type: SEQUENCE; Schema: public; Owner: -
49
+ --
50
+
51
+ CREATE SEQUENCE public.comments_id_seq
52
+ START WITH 1
53
+ INCREMENT BY 1
54
+ NO MINVALUE
55
+ NO MAXVALUE
56
+ CACHE 1;
57
+
58
+
59
+ --
60
+ -- Name: comments_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: -
61
+ --
62
+
63
+ ALTER SEQUENCE public.comments_id_seq OWNED BY public.comments.id;
64
+
65
+ --
66
+ -- Name: posts; Type: TABLE; Schema: public; Owner: -
67
+ --
68
+
69
+ CREATE TABLE public.posts (
70
+ id bigint NOT NULL,
71
+ created_at timestamp(6) without time zone NOT NULL,
72
+ updated_at timestamp(6) without time zone NOT NULL
73
+ );
74
+
75
+
76
+ --
77
+ -- Name: posts_id_seq; Type: SEQUENCE; Schema: public; Owner: -
78
+ --
79
+
80
+ CREATE SEQUENCE public.posts_id_seq
81
+ START WITH 1
82
+ INCREMENT BY 1
83
+ NO MINVALUE
84
+ NO MAXVALUE
85
+ CACHE 1;
86
+
87
+
88
+ --
89
+ -- Name: posts_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: -
90
+ --
91
+
92
+ ALTER SEQUENCE public.posts_id_seq OWNED BY public.posts.id;
93
+
94
+
95
+ --
96
+ -- Name: users; Type: TABLE; Schema: public; Owner: -
97
+ --
98
+
99
+ CREATE TABLE public.users (
100
+ id bigint NOT NULL,
101
+ created_at timestamp(6) without time zone NOT NULL,
102
+ updated_at timestamp(6) without time zone NOT NULL
103
+ );
104
+
105
+
106
+ --
107
+ -- Name: users_id_seq; Type: SEQUENCE; Schema: public; Owner: -
108
+ --
109
+
110
+ CREATE SEQUENCE public.users_id_seq
111
+ START WITH 1
112
+ INCREMENT BY 1
113
+ NO MINVALUE
114
+ NO MAXVALUE
115
+ CACHE 1;
116
+
117
+
118
+ --
119
+ -- Name: users_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: -
120
+ --
121
+
122
+ ALTER SEQUENCE public.users_id_seq OWNED BY public.users.id;
123
+
124
+ --
125
+ -- Name: comments id; Type: DEFAULT; Schema: public; Owner: -
126
+ --
127
+
128
+ ALTER TABLE ONLY public.comments ALTER COLUMN id SET DEFAULT nextval('public.comments_id_seq'::regclass);
129
+
130
+ --
131
+ -- Name: posts id; Type: DEFAULT; Schema: public; Owner: -
132
+ --
133
+
134
+ ALTER TABLE ONLY public.posts ALTER COLUMN id SET DEFAULT nextval('public.posts_id_seq'::regclass);
135
+
136
+ --
137
+ -- Name: users id; Type: DEFAULT; Schema: public; Owner: -
138
+ --
139
+
140
+ ALTER TABLE ONLY public.users ALTER COLUMN id SET DEFAULT nextval('public.users_id_seq'::regclass);
141
+
142
+ --
143
+ -- Name: comments comments_pkey; Type: CONSTRAINT; Schema: public; Owner: -
144
+ --
145
+
146
+ ALTER TABLE ONLY public.comments
147
+ ADD CONSTRAINT comments_pkey PRIMARY KEY (id);
148
+
149
+ --
150
+ -- Name: posts posts_pkey; Type: CONSTRAINT; Schema: public; Owner: -
151
+ --
152
+
153
+ ALTER TABLE ONLY public.posts
154
+ ADD CONSTRAINT posts_pkey PRIMARY KEY (id);
155
+
156
+ --
157
+ -- Name: users users_pkey; Type: CONSTRAINT; Schema: public; Owner: -
158
+ --
159
+
160
+ ALTER TABLE ONLY public.users
161
+ ADD CONSTRAINT users_pkey PRIMARY KEY (id);
162
+
163
+ --
164
+ -- Name: comments fk_rails_0000000001; Type: FK CONSTRAINT; Schema: public; Owner: -
165
+ --
166
+
167
+ ALTER TABLE ONLY public.comments
168
+ ADD CONSTRAINT fk_rails_0000000001 FOREIGN KEY (post_id) REFERENCES public.posts(id);
169
+
170
+ --
171
+ -- Name: comments fk_rails_0000000002; Type: FK CONSTRAINT; Schema: public; Owner: -
172
+ --
173
+
174
+ ALTER TABLE ONLY public.comments
175
+ ADD CONSTRAINT fk_rails_0000000002 FOREIGN KEY (user_id) REFERENCES public.users(id);
176
+
177
+ INSERT INTO "schema_migrations" (version) VALUES
178
+ ('20250124155339');
179
+ ```
180
+ </details>
181
+
182
+ into this much more compact and normalized version:
183
+
184
+ ```sql
185
+ -- Name: pgcrypto; Type: EXTENSION
186
+
187
+ CREATE EXTENSION IF NOT EXISTS pgcrypto WITH SCHEMA public;
188
+
189
+
190
+ -- Name: comments; Type: TABLE;
191
+
192
+ CREATE TABLE public.comments (
193
+ id bigserial PRIMARY KEY,
194
+ user_id bigint NOT NULL,
195
+ post_id bigint NOT NULL,
196
+ created_at timestamp(6) NOT NULL,
197
+ updated_at timestamp(6) NOT NULL
198
+ );
199
+
200
+
201
+ -- Name: posts; Type: TABLE;
202
+
203
+ CREATE TABLE public.posts (
204
+ id bigserial PRIMARY KEY,
205
+ created_at timestamp(6) NOT NULL,
206
+ updated_at timestamp(6) NOT NULL
207
+ );
208
+
209
+
210
+ -- Name: users; Type: TABLE;
211
+
212
+ CREATE TABLE public.users (
213
+ id bigserial PRIMARY KEY,
214
+ created_at timestamp(6) NOT NULL,
215
+ updated_at timestamp(6) NOT NULL
216
+ );
217
+
218
+ ALTER TABLE ONLY public.comments
219
+ ADD CONSTRAINT fk_rails_0000000001 FOREIGN KEY (post_id) REFERENCES public.posts (id),
220
+ ADD CONSTRAINT fk_rails_0000000002 FOREIGN KEY (user_id) REFERENCES public.users (id);
221
+
222
+ INSERT INTO "schema_migrations" (version) VALUES
223
+ ('20250124155339');
224
+ ```
225
+
226
+ which is a lot more compact, easier to read, and reduces the risk of
227
+ getting random diffs between machines after each migration.
228
+
229
+ Those transformations are made by manipulating the SQL AST directly
230
+ using [pg_query](https://github.com/pganalyze/pg_query), and each
231
+ transformation is opt-in and can be run independently.
232
+
233
+ You can also add your own transforms (see below).
234
+
235
+
236
+ ## Installation
237
+
238
+ Add the following to your Gemfile:
239
+
240
+ ```ruby
241
+ gem 'activerecord-clean-db-structure'
242
+ ```
243
+
244
+ ## Usage
245
+
246
+ ### Rails
247
+
248
+ Adding the gem to your dependencies this will automatically hook the library into your `rake db:migrate` task.
249
+
250
+ If you want to configure which transforms to use, you can configure the library with the following:
251
+
252
+ ```ruby
253
+ Rails.application.configure do
254
+ config.activerecord_pg_format_db_structure.preprocessors = [
255
+ ActiveRecordPgFormatDbStructure::Preprocessors::RemoveWhitespaces
256
+ ]
257
+
258
+ config.activerecord_pg_format_db_structure.transforms = [
259
+ ActiveRecordPgFormatDbStructure::Transforms::RemoveCommentsOnExtensions,
260
+ ActiveRecordPgFormatDbStructure::Transforms::InlinePrimaryKeys,
261
+ # ActiveRecordPgFormatDbStructure::Transforms::InlineForeignKeys,
262
+ ActiveRecordPgFormatDbStructure::Transforms::InlineSerials,
263
+ ActiveRecordPgFormatDbStructure::Transforms::InlineConstraints,
264
+ ActiveRecordPgFormatDbStructure::Transforms::MoveIndicesAfterCreateTable,
265
+ ActiveRecordPgFormatDbStructure::Transforms::GroupAlterTableStatements
266
+ ]
267
+
268
+ config.activerecord_pg_format_db_structure.deparser = ActiveRecordPgFormatDbStructure::Deparser
269
+ end
270
+ ```
271
+
272
+ ### Use outside of Rails
273
+
274
+ ```ruby
275
+ require "activerecord-pg-format-db-structure/formatter"
276
+
277
+ structure = File.read("db/structure.sql")
278
+ formatted = ActiveRecordPgFormatDbStructure::Formatter.new.format(structure)
279
+ File.write("db/structure.sql", formatted)
280
+ ```
281
+
282
+ ## Preprocessors
283
+
284
+ ### RemoveWhitespaces
285
+
286
+ Remove unnecessary comment, whitespase and empty lines.
287
+
288
+ ## Transformers
289
+
290
+ ### RemoveCommentsOnExtensions
291
+
292
+ Remove COMMENT statement applied to extensions
293
+
294
+ ### InlinePrimaryKeys
295
+
296
+ Inlines primary keys with the table declaration
297
+
298
+ ### InlineForeignKeys
299
+
300
+ Inline foreign key constraints.
301
+
302
+ Note: using this transform makes the structure file no longer
303
+ loadable, since tables should be created before a foreign key
304
+ can target it, so it is not included by default.
305
+
306
+ ### InlineSerials
307
+
308
+ Inline SERIAL declaration inside table declaration.
309
+
310
+ Note: the logic looks for statements of this shape:
311
+
312
+ ```sql
313
+ ALTER TABLE ONLY ts.tn ALTER COLUMN c SET DEFAULT nextval('ts.tn_c_seq'::regclass);
314
+ ```
315
+
316
+ It also assumes that the associated sequence has default settings. A
317
+ later version could try to be more strict / validate that the
318
+ sequence indeed has default settings.
319
+
320
+ ### InlineConstraints
321
+
322
+ Inline non-foreign key constraints into table declaration
323
+
324
+ ### MoveIndicesAfterCreateTable
325
+
326
+ Move indice declaration just below the table they index
327
+
328
+ ### GroupAlterTableStatements
329
+
330
+ Group alter table statements into one operation per
331
+ table.
332
+
333
+ Should be run after other operations that inline alter statements.
334
+
335
+ ## Deparser
336
+
337
+ As of today, this is a bare implemenation that works with the current combination of tranformers.
338
+
339
+ As of now, it will only deparse `CREATE TABLE`, `CREATE INDEX` and
340
+ `ALTER TABLE` statements. Other statements will be kept unchanged from
341
+ the input SQL.
342
+
343
+ In order to support all statements, we will need to find a solution to more cleanly format SQL queries, as deparsing a `CREATE VIEW` statement will result in a single unreadable line if relying on `pg_query`.
344
+
345
+ ## Development
346
+
347
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
348
+
349
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and the created tag, and push the `.gem` file to [rubygems.org](https://rubygems.org).
350
+
351
+ ## Contributing
352
+
353
+ Bug reports and pull requests are welcome on GitHub at https://github.com/ReifyAB/activerecord-pg-format-db-structure. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [code of conduct](https://github.com/ReifyAB/activerecord-pg-format-db-structure/blob/main/CODE_OF_CONDUCT.md).
354
+
355
+ ## Credits
356
+
357
+ Using the awesome [pg_query](https://github.com/pganalyze/pg_query) that provides a ruby interface to the native Postgres SQL parser.
358
+
359
+ Inspired by the [activerecord-clean-db-structure](https://github.com/lfittl/activerecord-clean-db-structure) gem by [Lukas Fittl](https://github.com/lfittl). I wanted to achieved something like that, but using a proper SQL parser instead of search / replace using regexps.
360
+
361
+ ## License
362
+
363
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
364
+
365
+ ## Code of Conduct
366
+
367
+ Everyone interacting in the activerecord-pg-format-db-structure project's codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/ReifyAB/activerecord-pg-format-db-structure/blob/main/CODE_OF_CONDUCT.md).
data/Rakefile ADDED
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bundler/gem_tasks"
4
+ require "rspec/core/rake_task"
5
+
6
+ RSpec::Core::RakeTask.new(:spec)
7
+
8
+ require "rubocop/rake_task"
9
+
10
+ RuboCop::RakeTask.new
11
+
12
+ task default: %i[spec rubocop]
@@ -0,0 +1,76 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "pg_query"
4
+
5
+ module ActiveRecordPgFormatDbStructure
6
+ # Returns a list of SQL strings from a list of PgQuery::RawStmt.
7
+ class Deparser
8
+ attr_reader :source
9
+
10
+ def initialize(source)
11
+ @source = source
12
+ end
13
+
14
+ def deparse_raw_statement(raw_statement)
15
+ case raw_statement.to_h
16
+ in stmt: { create_stmt: _ }
17
+ deparse_create_stmt(raw_statement.stmt.create_stmt)
18
+ in stmt: { index_stmt: _ }
19
+ deparse_index_stmt(raw_statement.stmt.index_stmt)
20
+ in stmt: { alter_table_stmt: _ }
21
+ deparse_alter_table_stmt(raw_statement.stmt.alter_table_stmt)
22
+ else
23
+ keep_original_string(raw_statement)
24
+ end
25
+ end
26
+
27
+ private
28
+
29
+ def keep_original_string(raw_statement)
30
+ start = raw_statement.stmt_location || 0
31
+ stop = start + raw_statement.stmt_len
32
+ source[start..stop]
33
+ end
34
+
35
+ def deparse_index_stmt(index_stmt)
36
+ deparse_stmt(index_stmt)
37
+ end
38
+
39
+ def deparse_alter_table_stmt(alter_table_stmt)
40
+ "\n#{
41
+ deparse_stmt(alter_table_stmt)
42
+ .gsub(" ADD ", "\n ADD ")
43
+ .gsub(" ALTER COLUMN ", "\n ALTER COLUMN ")
44
+ }"
45
+ end
46
+
47
+ def deparse_stmt(stmt)
48
+ "\n#{PgQuery.deparse_stmt(stmt)};"
49
+ end
50
+
51
+ def deparse_create_stmt(create_stmt)
52
+ table_str = "\n\n\n-- Name: #{create_stmt.relation.relname}; Type: TABLE;\n\n"
53
+ table_str << PgQuery.deparse_stmt(
54
+ PgQuery::CreateStmt.new(
55
+ **create_stmt.to_h,
56
+ table_elts: []
57
+ )
58
+ )
59
+ table_str.gsub!(/\(\)\z/, "")
60
+ table_str << "("
61
+ table_str << create_stmt.table_elts.map do |elt|
62
+ "\n #{deparse_table_elt(elt)}"
63
+ end.join(",")
64
+ table_str << "\n);"
65
+ table_str
66
+ end
67
+
68
+ def deparse_table_elt(elt)
69
+ PgQuery.deparse_stmt(
70
+ PgQuery::CreateStmt.new(
71
+ relation: PgQuery::RangeVar.new(relname: "tmp"), table_elts: [elt]
72
+ )
73
+ ).sub(/\ACREATE TABLE ONLY tmp \(/, "").sub(/\)\z/, "")
74
+ end
75
+ end
76
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "deparser"
4
+ require_relative "../activerecord-pg-format-db-structure"
5
+
6
+ module ActiveRecordPgFormatDbStructure
7
+ # Formats & normalizes in place the given SQL string
8
+ class Formatter
9
+ attr_reader :preprocessors, :transforms, :deparser
10
+
11
+ def initialize(
12
+ preprocessors: DEFAULT_PREPROCESSORS,
13
+ transforms: DEFAULT_TRANSFORMS,
14
+ deparser: DEFAULT_DEPARSER
15
+ )
16
+ @preprocessors = preprocessors
17
+ @transforms = transforms
18
+ @deparser = deparser
19
+ end
20
+
21
+ def format(source)
22
+ preprocessors.each do |preprocessor|
23
+ preprocessor.new(source).preprocess!
24
+ end
25
+
26
+ raw_statements = PgQuery.parse(source).tree.stmts
27
+
28
+ transforms.each do |transform|
29
+ transform.new(raw_statements).transform!
30
+ end
31
+
32
+ raw_statements.map do |raw_statement|
33
+ deparser.new(source).deparse_raw_statement(raw_statement)
34
+ end.compact.join
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ActiveRecordPgFormatDbStructure
4
+ module Preprocessors
5
+ # Remove whitespace and SQL comments from an SQL string
6
+ class RemoveWhitespaces
7
+ attr_reader :source
8
+
9
+ def initialize(source)
10
+ @source = source
11
+ end
12
+
13
+ def preprocess!
14
+ # Remove trailing whitespace
15
+ source.gsub!(/[ \t]+$/, "")
16
+ source.gsub!(/\A\n/, "")
17
+ source.gsub!(/\n\n\z/, "\n")
18
+
19
+ # Remove useless comment lines
20
+ source.gsub!(/^--\n/, "")
21
+
22
+ # Remove useless, version-specific parts of comments
23
+ source.gsub!(/^-- (.*); Schema: ([\w.]+|-); Owner: -.*/, '-- \1')
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ActiveRecordPgFormatDbStructure
4
+ # Setup for Rails
5
+ class Railtie < Rails::Railtie
6
+ config.activerecord_pg_format_db_structure = ActiveSupport::OrderedOptions.new
7
+ config.activerecord_pg_format_db_structure.preprocessors = DEFAULT_PREPROCESSORS.dup
8
+ config.activerecord_pg_format_db_structure.transforms = DEFAULT_TRANSFORMS.dup
9
+ config.activerecord_pg_format_db_structure.deparser = DEFAULT_DEPARSER
10
+
11
+ rake_tasks do
12
+ load "activerecord-pg-format-db-structure/tasks/clean_db_structure.rake"
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../formatter"
4
+
5
+ Rake::Task["db:schema:dump"].enhance do
6
+ formatter = ActiveRecordPgFormatDbStructure::Formatter.new(
7
+ **Rails.application.config.activerecord_pg_format_db_structure
8
+ )
9
+
10
+ ActiveRecord::Base.configurations.configs_for(env_name: ActiveRecord::Tasks::DatabaseTasks.env).each do |db_config|
11
+ filename = ActiveRecord::Tasks::DatabaseTasks.schema_dump_path(db_config, :sql)
12
+ next unless File.exist?(filename)
13
+
14
+ formatted = formatter.format(File.read(filename))
15
+ File.write(filename, formatted)
16
+ end
17
+ end
@@ -0,0 +1,80 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "pg_query"
4
+
5
+ module ActiveRecordPgFormatDbStructure
6
+ module Transforms
7
+ # Group alter table statements into one operation per
8
+ # table. Should be run after other operations that inline alter statements.
9
+ class GroupAlterTableStatements
10
+ attr_reader :raw_statements
11
+
12
+ def initialize(raw_statements)
13
+ @raw_statements = raw_statements
14
+ end
15
+
16
+ def transform!
17
+ alter_groups = extract_alter_table_statements!
18
+
19
+ return if alter_groups.empty?
20
+
21
+ insert_index = raw_statements.each_with_index.map do |s, i|
22
+ # after all tables, materialized views and indices
23
+ i if s.stmt.to_h in { create_stmt: _ } | { create_table_as_stmt: _ } | { index_stmt: _ }
24
+ end.compact.last
25
+
26
+ sort_groups(alter_groups).each do |_, alters| # rubocop:disable Style/HashEachMethods
27
+ alter = sort_alters(alters).reduce do |altera, alterb|
28
+ altera.stmt.alter_table_stmt.cmds = altera.stmt.alter_table_stmt.cmds + alterb.stmt.alter_table_stmt.cmds
29
+ altera
30
+ end
31
+ raw_statements.insert(insert_index + 1, alter)
32
+ end
33
+ end
34
+
35
+ private
36
+
37
+ def sort_alters(alters)
38
+ alters.sort_by do |alter|
39
+ case alter.stmt.alter_table_stmt.to_h
40
+ in cmds: [{
41
+ alter_table_cmd: {
42
+ subtype: :AT_AddConstraint,
43
+ def: { constraint: {
44
+ contype: :CONSTR_FOREIGN,
45
+ fk_attrs: [{string: {sval: fk_attr}}],
46
+ }}
47
+ }
48
+ }]
49
+ [1, fk_attr]
50
+ else
51
+ [2, ""]
52
+ end
53
+ end
54
+ end
55
+
56
+ def sort_groups(groups)
57
+ groups.sort_by { |relation, _| [relation[:schemaname], relation[:relname]] }.reverse
58
+ end
59
+
60
+ def extract_alter_table_statements!
61
+ alter_groups = {}
62
+ raw_statements.delete_if do |s|
63
+ next unless s.stmt.to_h in alter_table_stmt: {
64
+ objtype: :OBJECT_TABLE,
65
+ relation: {
66
+ schemaname:,
67
+ relname:
68
+ }
69
+ }
70
+
71
+ relation = { schemaname:, relname: }
72
+ alter_groups[relation] ||= []
73
+ alter_groups[relation] << s
74
+ true
75
+ end
76
+ alter_groups
77
+ end
78
+ end
79
+ end
80
+ end