@truefoundry/tfy-infra-engine 0.1.0 → 0.1.2-canary.e8cd23d

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,278 @@
1
+ #!/bin/sh
2
+ # canonical_hash.sh -- Format-independent canonical hashing for HCL files.
3
+ #
4
+ # Produces a deterministic SHA-256 hash of HCL content that is independent of
5
+ # formatting differences (indentation, alignment, comments, trailing commas).
6
+ # Two semantically identical HCL files with different formatting will produce
7
+ # the same hash.
8
+ #
9
+ # USAGE:
10
+ # canonical_hash.sh <file> Hash a file by path
11
+ # canonical_hash.sh Hash from stdin (or explicit "-")
12
+ # canonical_hash.sh --test Run self-test to verify the pipeline
13
+ #
14
+ # OUTPUT:
15
+ # sha256:<64-hex-chars>
16
+ #
17
+ # EXIT CODES:
18
+ # 0 Success
19
+ # 1 No SHA-256 tool found
20
+ # 2 Input error (file not found, etc.)
21
+ #
22
+ # PIPELINE:
23
+ # 1. Strip @tfy-status header (sed)
24
+ # 2. Strip carriage returns (tr -d '\r')
25
+ # 3. Canonicalize (AWK 4-state machine)
26
+ # 4. SHA-256 hash
27
+ #
28
+ # ---------------------------------------------------------------------------
29
+ # CANONICALIZATION ALGORITHM
30
+ #
31
+ # A 4-state character scanner implemented in POSIX AWK. It processes input
32
+ # character by character, stripping comments and whitespace outside strings,
33
+ # normalizing trailing commas, and preserving string content exactly.
34
+ #
35
+ # States:
36
+ # NORMAL (0) -- Outside strings and comments
37
+ # STRING (1) -- Inside a double-quoted string
38
+ # LINE_COMMENT (2) -- Inside a # or // comment (until end of line)
39
+ # BLOCK_COMMENT (3) -- Inside a /* ... */ comment
40
+ #
41
+ # State transitions:
42
+ #
43
+ # State | Whitespace | " | # | // | /* | */ | , | Other
44
+ # ------------- | ---------- | -------------- | ------------- | ------------- | -------------- | --------- | ---------------------- | ---------
45
+ # NORMAL | discard | emit, ->STRING | ->LINE_COMMENT| ->LINE_COMMENT| ->BLOCK_COMMENT| n/a | buffer (trailing logic) | emit
46
+ # STRING | emit | emit, ->NORMAL | emit | emit | emit | emit | emit | emit
47
+ # | | (if unescaped) | | | | | |
48
+ # LINE_COMMENT | discard | discard | discard | discard | discard | discard | discard | discard
49
+ # | | | | | | | | (newline->NORMAL)
50
+ # BLOCK_COMMENT | discard | discard | discard | discard | discard | ->NORMAL | discard | discard
51
+ #
52
+ # Trailing comma normalization:
53
+ # When "," is seen in NORMAL state, it is buffered (not emitted immediately).
54
+ # When the next non-whitespace character arrives:
55
+ # - If it is "}" or "]", discard the buffered comma (trailing comma removed)
56
+ # - Otherwise, emit the buffered comma then the character
57
+ #
58
+ # Escape handling in STRING state:
59
+ # A backslash (\) sets an escape flag. \" does NOT exit STRING state.
60
+ # \\ (escaped backslash) clears the escape flag, so a following " DOES exit.
61
+ #
62
+ # EXAMPLE:
63
+ # Input A (formatted):
64
+ # resource "aws_instance" "main" {
65
+ # ami = "ami-12345" # AMI ID
66
+ # instance_type = "t2.micro"
67
+ # tags = { Name = "prod", }
68
+ # }
69
+ #
70
+ # Input B (compact):
71
+ # resource "aws_instance" "main" {
72
+ # ami = "ami-12345"
73
+ # instance_type = "t2.micro"
74
+ # tags = {Name = "prod"}
75
+ # }
76
+ #
77
+ # Both produce identical canonical form:
78
+ # resource"aws_instance""main"{ami="ami-12345"instance_type="t2.micro"tags={Name="prod"}}
79
+ #
80
+ # KNOWN TRADE-OFFS (consistently wrong, but deterministic):
81
+ # 1. Template expressions with nested quotes: "${lookup(map, "default")}"
82
+ # The inner " toggles STRING state. Whitespace inside "default" may be
83
+ # stripped. Both formatted and unformatted versions produce the same result.
84
+ # 2. Heredocs: No HEREDOC state. Whitespace inside heredocs is stripped as
85
+ # normal code. Both versions produce the same result since formatters
86
+ # never change heredoc content.
87
+ # 3. Escaped backslash before quote: "C:\\dir\\" -- the closing " appears
88
+ # escaped. Rare in HCL.
89
+ # 4. Block comment */ split across lines: * at end of line 1 and / at start
90
+ # of line 2 won't be detected as */. Extremely unlikely in practice.
91
+ # ---------------------------------------------------------------------------
92
+
93
+ set -e
94
+
95
+ # Detect SHA-256 tool (once)
96
+ detect_sha_cmd() {
97
+ if command -v sha256sum >/dev/null 2>&1; then
98
+ SHA_CMD="sha256sum"
99
+ elif command -v shasum >/dev/null 2>&1; then
100
+ SHA_CMD="shasum -a 256"
101
+ elif command -v openssl >/dev/null 2>&1; then
102
+ SHA_CMD="openssl dgst -sha256 -r"
103
+ else
104
+ printf "ERROR: no SHA-256 tool found\n" >&2
105
+ exit 1
106
+ fi
107
+ }
108
+
109
+ # Strip @tfy-status header block: 3-line block delimited by begin/end sentinels
110
+ strip_header() {
111
+ sed '/^# @tfy-status:begin$/,/^# @tfy-status:end$/d'
112
+ }
113
+
114
+ # AWK 4-state canonicalizer
115
+ canonicalize() {
116
+ awk '
117
+ BEGIN {
118
+ state = 0 # 0=NORMAL, 1=STRING, 2=LINE_COMMENT, 3=BLOCK_COMMENT
119
+ esc = 0
120
+ pending_comma = 0
121
+ out = ""
122
+ }
123
+
124
+ {
125
+ n = length($0)
126
+ for (i = 1; i <= n; i++) {
127
+ c = substr($0, i, 1)
128
+ nx = (i < n) ? substr($0, i + 1, 1) : ""
129
+
130
+ if (state == 0) {
131
+ # NORMAL
132
+ if (c == " " || c == "\t") continue
133
+
134
+ # Comment starts must NOT flush pending comma -- the comment
135
+ # is invisible, so the comma stays pending for the next real char
136
+ if (c == "#") {
137
+ state = 2
138
+ continue
139
+ }
140
+ if (c == "/" && nx == "/") {
141
+ state = 2
142
+ i++
143
+ continue
144
+ }
145
+ if (c == "/" && nx == "*") {
146
+ state = 3
147
+ i++
148
+ continue
149
+ }
150
+
151
+ # Non-whitespace, non-comment: flush pending comma
152
+ if (pending_comma) {
153
+ if (c != "}" && c != "]") out = out ","
154
+ pending_comma = 0
155
+ }
156
+
157
+ if (c == "\"") {
158
+ out = out c
159
+ state = 1
160
+ esc = 0
161
+ } else if (c == ",") {
162
+ pending_comma = 1
163
+ } else {
164
+ out = out c
165
+ }
166
+
167
+ } else if (state == 1) {
168
+ # STRING
169
+ out = out c
170
+ if (c == "\"" && !esc) {
171
+ state = 0
172
+ }
173
+ if (c == "\\" && !esc) {
174
+ esc = 1
175
+ } else {
176
+ esc = 0
177
+ }
178
+
179
+ } else if (state == 2) {
180
+ # LINE_COMMENT -- discard until end of line (handled by line loop)
181
+
182
+ } else if (state == 3) {
183
+ # BLOCK_COMMENT
184
+ if (c == "*" && nx == "/") {
185
+ state = 0
186
+ i++
187
+ }
188
+ }
189
+ }
190
+
191
+ # End of line resets LINE_COMMENT state
192
+ if (state == 2) state = 0
193
+ }
194
+
195
+ END {
196
+ if (pending_comma) out = out ","
197
+ printf "%s", out
198
+ }
199
+ '
200
+ }
201
+
202
+ # Compute SHA-256 and format as "sha256:<hex>"
203
+ compute_hash() {
204
+ hex=$($SHA_CMD | awk '{ print $1 }')
205
+ printf "sha256:%s\n" "$hex"
206
+ }
207
+
208
+ # Run the full pipeline on stdin
209
+ hash_stdin() {
210
+ strip_header | tr -d '\r' | canonicalize | compute_hash
211
+ }
212
+
213
+ # Run the full pipeline on a file
214
+ hash_file() {
215
+ if [ ! -f "$1" ]; then
216
+ printf "ERROR: file not found: %s\n" "$1" >&2
217
+ exit 2
218
+ fi
219
+ hash_stdin < "$1"
220
+ }
221
+
222
+ # Self-test mode
223
+ run_self_test() {
224
+ detect_sha_cmd
225
+
226
+ # Test 1: output format
227
+ result=$(printf 'a = 1\n' | hash_stdin)
228
+ case "$result" in
229
+ sha256:[a-f0-9]*)
230
+ ;;
231
+ *)
232
+ printf "FAIL: output format: %s\n" "$result" >&2
233
+ exit 1
234
+ ;;
235
+ esac
236
+
237
+ # Test 2: format independence
238
+ formatted=$(printf 'resource "null" "a" {\n ami = "x" # comment\n tags = { Name = "prod", }\n}\n' | hash_stdin)
239
+ compact=$(printf 'resource "null" "a" {\nami="x"\ntags={Name="prod"}\n}\n' | hash_stdin)
240
+ if [ "$formatted" != "$compact" ]; then
241
+ printf "FAIL: format independence\n formatted: %s\n compact: %s\n" "$formatted" "$compact" >&2
242
+ exit 1
243
+ fi
244
+
245
+ # Test 3: header stripping
246
+ with_header=$(printf '# @tfy-status:begin\n# {"managed":true}\n# @tfy-status:end\na = 1\n' | hash_stdin)
247
+ without_header=$(printf 'a = 1\n' | hash_stdin)
248
+ if [ "$with_header" != "$without_header" ]; then
249
+ printf "FAIL: header stripping\n with: %s\n without: %s\n" "$with_header" "$without_header" >&2
250
+ exit 1
251
+ fi
252
+
253
+ # Test 4: string preservation (comment chars inside strings)
254
+ hash_a=$(printf 'a = "# not a comment"\n' | hash_stdin)
255
+ hash_b=$(printf 'a = "# different comment"\n' | hash_stdin)
256
+ if [ "$hash_a" = "$hash_b" ]; then
257
+ printf "FAIL: string preservation -- different strings produced same hash\n" >&2
258
+ exit 1
259
+ fi
260
+
261
+ printf "OK: all self-tests passed\n"
262
+ exit 0
263
+ }
264
+
265
+ # Main entry point
266
+ detect_sha_cmd
267
+
268
+ case "${1:-}" in
269
+ --test)
270
+ run_self_test
271
+ ;;
272
+ ""|-)
273
+ hash_stdin
274
+ ;;
275
+ *)
276
+ hash_file "$1"
277
+ ;;
278
+ esac