asmdiff 0.1.0__tar.gz → 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ {
2
+ "permissions": {
3
+ "allow": [
4
+ "Bash(python3 test_asmdiff.py)"
5
+ ]
6
+ }
7
+ }
@@ -1,13 +1,13 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: asmdiff
3
- Version: 0.1.0
3
+ Version: 0.1.1
4
4
  Summary: Compare per-function assembly between paired C implementations
5
5
  Project-URL: Homepage, https://github.com/rt-rtos/asmdiff
6
6
  Project-URL: Repository, https://github.com/rt-rtos/asmdiff
7
7
  Author: Rasmus Tikkanen
8
8
  License-Expression: MIT
9
9
  License-File: LICENSE
10
- Keywords: assembly,clang,codegen,compiler,disassembly,gcc
10
+ Keywords: arm,assembly,cfg,clang,cli,codegen,compiler,devtools,diff,disassembly,dsp,embedded,firmware,gcc,loop-analysis,riscv,static-analysis,xtensa
11
11
  Classifier: Environment :: Console
12
12
  Classifier: Intended Audience :: Developers
13
13
  Classifier: Programming Language :: Python :: 3
@@ -19,7 +19,12 @@ Description-Content-Type: text/markdown
19
19
  # asmdiff
20
20
  ## per-function assembly comparison for paired C implementations
21
21
 
22
- > asmdiff is a command-line tool for comparing the generated assembly of individual C functions across implementations, compiler flags, compiler versions, and source revisions. It is intended for investigating compiler code generation rather than benchmarking runtime performance.
22
+ > asmdiff is a stdlib only command-line tool for comparing the generated assembly of individual C functions across implementations, compiler flags, compiler versions, and source revisions. It is intended for investigating compiler code generation rather than benchmarking runtime performance.
23
+
24
+ ### Try it yourself:
25
+
26
+ `$ uvx asmdiff` / `$ pipx asmdiff`
27
+ ---
23
28
 
24
29
  `asmdiff.py` answers one question fast: **when I rewrite a C construct, what
25
30
  does the compiler actually emit - before and after?** It compiles a small
@@ -28,12 +33,9 @@ assembly, and prints side-by-side listings plus a summary of instruction
28
33
  counts, outbound calls, and loop spans.
29
34
 
30
35
  Compilers and flags are configured per project through named targets in an
31
- `asmdiff.toml` file the tool itself has no project-specific defaults and
32
- parses any GNU-as ELF assembly.
36
+ `asmdiff.toml` file and parses any GNU-as ELF assembly.
33
37
 
34
- Its home use case: checking whether an expression that used to constant-fold
35
- (e.g. `x * exp2f(5)` → one multiply) turns into a library call (e.g.
36
- `ldexpf(x, 5)` → `jmp ldexpf@PLT`) after a "cleanup". That distinction is
38
+ Whether something constant folds or turns into a libcall is a distinction that is
37
39
  invisible in source review and decisive on hot paths.
38
40
 
39
41
  ## Quick start
@@ -1,7 +1,12 @@
1
1
  # asmdiff
2
2
  ## per-function assembly comparison for paired C implementations
3
3
 
4
- > asmdiff is a command-line tool for comparing the generated assembly of individual C functions across implementations, compiler flags, compiler versions, and source revisions. It is intended for investigating compiler code generation rather than benchmarking runtime performance.
4
+ > asmdiff is a stdlib only command-line tool for comparing the generated assembly of individual C functions across implementations, compiler flags, compiler versions, and source revisions. It is intended for investigating compiler code generation rather than benchmarking runtime performance.
5
+
6
+ ### Try it yourself:
7
+
8
+ `$ uvx asmdiff` / `$ pipx asmdiff`
9
+ ---
5
10
 
6
11
  `asmdiff.py` answers one question fast: **when I rewrite a C construct, what
7
12
  does the compiler actually emit - before and after?** It compiles a small
@@ -10,12 +15,9 @@ assembly, and prints side-by-side listings plus a summary of instruction
10
15
  counts, outbound calls, and loop spans.
11
16
 
12
17
  Compilers and flags are configured per project through named targets in an
13
- `asmdiff.toml` file the tool itself has no project-specific defaults and
14
- parses any GNU-as ELF assembly.
18
+ `asmdiff.toml` file and parses any GNU-as ELF assembly.
15
19
 
16
- Its home use case: checking whether an expression that used to constant-fold
17
- (e.g. `x * exp2f(5)` → one multiply) turns into a library call (e.g.
18
- `ldexpf(x, 5)` → `jmp ldexpf@PLT`) after a "cleanup". That distinction is
20
+ Whether something constant folds or turns into a libcall is a distinction that is
19
21
  invisible in source review and decisive on hot paths.
20
22
 
21
23
  ## Quick start
@@ -60,6 +60,15 @@ NOISE = re.compile(
60
60
  )
61
61
  # Compiler-generated bracketing labels that add nothing (.LFB0:, .Lfunc_end0:).
62
62
  NOISE_LABEL = re.compile(r"^\.(LFB|LFE|Lfunc_begin|Lfunc_end)\d*:")
63
+ # Data emitted *inside* a function body: switch jump tables (.long/.word
64
+ # entries), inline constants, strings. These are not instructions, so they
65
+ # must not be counted; and a self-relative table entry (".long .L5-.L4")
66
+ # references its base label from below, which the loop-span scan would
67
+ # otherwise read as a backward branch and report as a phantom loop.
68
+ DATA = re.compile(
69
+ r"^\.(long|quad|word|hword|short|byte|[248]byte|value|zero|octa|"
70
+ r"string|ascii|asciz|single|double|float|dc(\.[abwlq])?)\b"
71
+ )
63
72
 
64
73
 
65
74
  def extract_functions(asm_text):
@@ -67,9 +76,10 @@ def extract_functions(asm_text):
67
76
 
68
77
  A function body runs from its column-0 label to the matching .size
69
78
  directive (gcc and clang both emit one on ELF) or the next function
70
- label. Comment lines, CFI/section/alignment directives, and
71
- compiler bracketing labels are dropped; instructions and meaningful
72
- local labels (loop targets) are kept, whitespace-stripped.
79
+ label. Comment lines, CFI/section/alignment directives, compiler
80
+ bracketing labels, and inline data (switch jump tables, constants) are
81
+ dropped; instructions and meaningful local labels (loop targets) are
82
+ kept, whitespace-stripped.
73
83
  """
74
84
  funcs = {}
75
85
  current = None
@@ -87,7 +97,7 @@ def extract_functions(asm_text):
87
97
  line = raw.strip()
88
98
  if not line or line.startswith(("#", "//")):
89
99
  continue
90
- if NOISE.match(line) or NOISE_LABEL.match(line):
100
+ if NOISE.match(line) or NOISE_LABEL.match(line) or DATA.match(line):
91
101
  continue
92
102
  funcs[current].append(line)
93
103
  return funcs
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "asmdiff"
7
- version = "0.1.0"
7
+ version = "0.1.1"
8
8
  description = "Compare per-function assembly between paired C implementations"
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -13,7 +13,11 @@ requires-python = ">=3.8"
13
13
  authors = [
14
14
  { name = "Rasmus Tikkanen" },
15
15
  ]
16
- keywords = ["assembly", "compiler", "gcc", "clang", "codegen", "disassembly"]
16
+ keywords = [
17
+ "assembly", "compiler", "gcc", "clang", "disassembly", "codegen",
18
+ "diff", "static-analysis", "embedded", "firmware", "dsp",
19
+ "xtensa", "arm", "riscv", "cli", "devtools", "loop-analysis", "cfg"
20
+ ]
17
21
  classifiers = [
18
22
  "Environment :: Console",
19
23
  "Intended Audience :: Developers",
@@ -74,6 +74,77 @@ looper:
74
74
  \t.size\tlooper, .-looper
75
75
  """
76
76
 
77
+ # A switch lowered to a jump table, faithful to `gcc -O2 -S`. The table is
78
+ # emitted *inside* the function body (between the label and `.size`) via a
79
+ # .rodata/.text toggle, with self-relative entries `.long .Lx-.L4` — data,
80
+ # not instructions, and their .L4 operand must not read as a backward branch.
81
+ SWITCH_ASM = """\
82
+ \t.globl\tsel
83
+ \t.type\tsel, @function
84
+ sel:
85
+ .LFB0:
86
+ \t.cfi_startproc
87
+ \tendbr64
88
+ \tcmpl\t$4, %edi
89
+ \tja\t.L9
90
+ \tleaq\t.L4(%rip), %rcx
91
+ \tmovl\t%edi, %edi
92
+ \tmovslq\t(%rcx,%rdi,4), %rax
93
+ \taddq\t%rcx, %rax
94
+ \tnotrack jmp\t*%rax
95
+ \t.section\t.rodata
96
+ \t.align 4
97
+ .L4:
98
+ \t.long\t.L8-.L4
99
+ \t.long\t.L7-.L4
100
+ \t.long\t.L6-.L4
101
+ \t.long\t.L5-.L4
102
+ \t.long\t.L3-.L4
103
+ \t.text
104
+ \t.p2align 4,,10
105
+ .L5:
106
+ \tmovl\t%esi, %eax
107
+ \txorl\t%edx, %eax
108
+ \tret
109
+ .L3:
110
+ \tmovl\t%esi, %eax
111
+ \torl\t%edx, %eax
112
+ \tret
113
+ .L8:
114
+ \tleal\t(%rsi,%rdx), %eax
115
+ \tret
116
+ .L7:
117
+ \tmovl\t%esi, %eax
118
+ \tsubl\t%edx, %eax
119
+ \tret
120
+ .L6:
121
+ \tmovl\t%esi, %eax
122
+ \timull\t%edx, %eax
123
+ \tret
124
+ \t.cfi_endproc
125
+ \t.size\tsel, .-sel
126
+ """
127
+
128
+ # Jump tables on other targets use plain (non-self-relative) label entries,
129
+ # plus stray inline constants; all are data directives, none are branches.
130
+ DATA_DIRECTIVES_ASM = """\
131
+ \t.type\ttbl, @function
132
+ tbl:
133
+ \t.cfi_startproc
134
+ \tjx\ta8
135
+ .Ltab:
136
+ \t.word\t.La
137
+ \t.word\t.Lb
138
+ \t.byte\t3
139
+ \t.quad\t0
140
+ .La:
141
+ \tadd.n\ta2, a2, a2
142
+ \tretw.n
143
+ .Lb:
144
+ \tretw.n
145
+ \t.size\ttbl, .-tbl
146
+ """
147
+
77
148
 
78
149
  class TestExtractFunctions(unittest.TestCase):
79
150
  def test_gcc_functions_found(self):
@@ -173,6 +244,37 @@ class TestLoopSpans(unittest.TestCase):
173
244
  self.assertEqual(asmdiff.loop_spans(lines), [])
174
245
 
175
246
 
247
+ class TestJumpTableData(unittest.TestCase):
248
+ """Inline data (switch jump tables, constants) emitted inside a function
249
+ body is not counted as instructions and never reads as a loop span."""
250
+
251
+ def test_table_entries_stripped_from_body(self):
252
+ body = asmdiff.extract_functions(SWITCH_ASM)["sel"]
253
+ self.assertFalse(any(".long" in line for line in body))
254
+ self.assertIn(".L4:", body) # the table's anchor label is kept
255
+
256
+ def test_table_entries_not_counted_as_instructions(self):
257
+ body = asmdiff.extract_functions(SWITCH_ASM)["sel"]
258
+ insns, calls = asmdiff.analyze(body)
259
+ self.assertEqual(insns, 22) # 27 before the fix (5 .long entries)
260
+ self.assertEqual(calls, []) # notrack jmp *%rax is not a call
261
+
262
+ def test_self_relative_table_is_not_a_phantom_span(self):
263
+ # `.long .L5-.L4` references the table base .L4 from below; without
264
+ # stripping, that reads as a backward branch and invents a loop.
265
+ body = asmdiff.extract_functions(SWITCH_ASM)["sel"]
266
+ self.assertEqual(asmdiff.loop_spans(body), [])
267
+
268
+ def test_various_data_directives_stripped(self):
269
+ # .word/.byte/.quad jump tables and constants on other targets.
270
+ body = asmdiff.extract_functions(DATA_DIRECTIVES_ASM)["tbl"]
271
+ for directive in (".word", ".byte", ".quad"):
272
+ self.assertFalse(any(directive in line for line in body), directive)
273
+ insns, _ = asmdiff.analyze(body)
274
+ self.assertEqual(insns, 4) # 8 before the fix (4 data entries)
275
+ self.assertEqual(asmdiff.loop_spans(body), [])
276
+
277
+
176
278
  class TestAutoPairs(unittest.TestCase):
177
279
  def test_pairs_by_convention(self):
178
280
  names = ["old_const", "new_const", "old_rt", "new_rt", "helper"]
@@ -1,8 +0,0 @@
1
- {
2
- "permissions": {
3
- "allow": [
4
- "Read(//home/fatta/esp-idf/amy/tools/**)",
5
- "Read(//home/fatta/esp-idf/amy/**)"
6
- ]
7
- }
8
- }
File without changes
File without changes
File without changes