ingero-annotate 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ingero_annotate-0.1.0/.gitignore +61 -0
- ingero_annotate-0.1.0/LICENSE +201 -0
- ingero_annotate-0.1.0/PKG-INFO +112 -0
- ingero_annotate-0.1.0/README.md +85 -0
- ingero_annotate-0.1.0/ingero_annotate.py +296 -0
- ingero_annotate-0.1.0/pyproject.toml +45 -0
- ingero_annotate-0.1.0/tests/test_e2e_install_and_use.py +447 -0
- ingero_annotate-0.1.0/tests/test_no_vendored_copies.py +86 -0
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# Binaries
|
|
2
|
+
/bin/
|
|
3
|
+
/ingero
|
|
4
|
+
*.a
|
|
5
|
+
|
|
6
|
+
# eBPF generated. The committed *_bpfel.go and *_bpfel.o pairs ARE
|
|
7
|
+
# release artifacts; CI and goreleaser embed them. The committed
|
|
8
|
+
# bpf/headers/vmlinux.h is the canonical type catalog the per-arch
|
|
9
|
+
# CO-RE compile reads (see Makefile `generate` target). It is host-
|
|
10
|
+
# arch independent — CO-RE relocates field accesses against the
|
|
11
|
+
# loaded kernel BTF at runtime, not the headers used at compile time.
|
|
12
|
+
|
|
13
|
+
# Go
|
|
14
|
+
vendor/
|
|
15
|
+
|
|
16
|
+
# Build output
|
|
17
|
+
dist/
|
|
18
|
+
.output/
|
|
19
|
+
|
|
20
|
+
# IDE
|
|
21
|
+
.vscode/
|
|
22
|
+
.idea/
|
|
23
|
+
*.swp
|
|
24
|
+
*.swo
|
|
25
|
+
*~
|
|
26
|
+
|
|
27
|
+
# OS
|
|
28
|
+
.DS_Store
|
|
29
|
+
Thumbs.db
|
|
30
|
+
|
|
31
|
+
# Secrets
|
|
32
|
+
.env
|
|
33
|
+
*.pem
|
|
34
|
+
*.key
|
|
35
|
+
|
|
36
|
+
# VM state
|
|
37
|
+
.tensordock-vm.json
|
|
38
|
+
.lambdalabs-vm*.json
|
|
39
|
+
.azure-vm.json
|
|
40
|
+
|
|
41
|
+
# Logs (session logs, test output)
|
|
42
|
+
/logs/
|
|
43
|
+
|
|
44
|
+
# Claude Code (local-only project context)
|
|
45
|
+
.claude/
|
|
46
|
+
CLAUDE.md
|
|
47
|
+
|
|
48
|
+
# Test artifacts
|
|
49
|
+
coverage.out
|
|
50
|
+
*.test
|
|
51
|
+
|
|
52
|
+
# MCP Registry publisher tokens
|
|
53
|
+
.mcpregistry_*
|
|
54
|
+
|
|
55
|
+
# Asciinema recordings (large, used to generate GIFs)
|
|
56
|
+
docs/assets/*.cast
|
|
57
|
+
|
|
58
|
+
# Python bytecode cache from the examples/ integration test suites
|
|
59
|
+
__pycache__/
|
|
60
|
+
*.pyc
|
|
61
|
+
.pytest_cache/
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
Apache License
|
|
2
|
+
Version 2.0, January 2004
|
|
3
|
+
http://www.apache.org/licenses/
|
|
4
|
+
|
|
5
|
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
|
6
|
+
|
|
7
|
+
1. Definitions.
|
|
8
|
+
|
|
9
|
+
"License" shall mean the terms and conditions for use, reproduction,
|
|
10
|
+
and distribution as defined by Sections 1 through 9 of this document.
|
|
11
|
+
|
|
12
|
+
"Licensor" shall mean the copyright owner or entity authorized by
|
|
13
|
+
the copyright owner that is granting the License.
|
|
14
|
+
|
|
15
|
+
"Legal Entity" shall mean the union of the acting entity and all
|
|
16
|
+
other entities that control, are controlled by, or are under common
|
|
17
|
+
control with that entity. For the purposes of this definition,
|
|
18
|
+
"control" means (i) the power, direct or indirect, to cause the
|
|
19
|
+
direction or management of such entity, whether by contract or
|
|
20
|
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
|
21
|
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
|
22
|
+
|
|
23
|
+
"You" (or "Your") shall mean an individual or Legal Entity
|
|
24
|
+
exercising permissions granted by this License.
|
|
25
|
+
|
|
26
|
+
"Source" form shall mean the preferred form for making modifications,
|
|
27
|
+
including but not limited to software source code, documentation
|
|
28
|
+
source, and configuration files.
|
|
29
|
+
|
|
30
|
+
"Object" form shall mean any form resulting from mechanical
|
|
31
|
+
transformation or translation of a Source form, including but
|
|
32
|
+
not limited to compiled object code, generated documentation,
|
|
33
|
+
and conversions to other media types.
|
|
34
|
+
|
|
35
|
+
"Work" shall mean the work of authorship, whether in Source or
|
|
36
|
+
Object form, made available under the License, as indicated by a
|
|
37
|
+
copyright notice that is included in or attached to the work
|
|
38
|
+
(an example is provided in the Appendix below).
|
|
39
|
+
|
|
40
|
+
"Derivative Works" shall mean any work, whether in Source or Object
|
|
41
|
+
form, that is based on (or derived from) the Work and for which the
|
|
42
|
+
editorial revisions, annotations, elaborations, or other modifications
|
|
43
|
+
represent, as a whole, an original work of authorship. For the purposes
|
|
44
|
+
of this License, Derivative Works shall not include works that remain
|
|
45
|
+
separable from, or merely link (or bind by name) to the interfaces of,
|
|
46
|
+
the Work and Derivative Works thereof.
|
|
47
|
+
|
|
48
|
+
"Contribution" shall mean any work of authorship, including
|
|
49
|
+
the original version of the Work and any modifications or additions
|
|
50
|
+
to that Work or Derivative Works thereof, that is intentionally
|
|
51
|
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
|
52
|
+
or by an individual or Legal Entity authorized to submit on behalf of
|
|
53
|
+
the copyright owner. For the purposes of this definition, "submitted"
|
|
54
|
+
means any form of electronic, verbal, or written communication sent
|
|
55
|
+
to the Licensor or its representatives, including but not limited to
|
|
56
|
+
communication on electronic mailing lists, source code control systems,
|
|
57
|
+
and issue tracking systems that are managed by, or on behalf of, the
|
|
58
|
+
Licensor for the purpose of discussing and improving the Work, but
|
|
59
|
+
excluding communication that is conspicuously marked or otherwise
|
|
60
|
+
designated in writing by the copyright owner as "Not a Contribution."
|
|
61
|
+
|
|
62
|
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
|
63
|
+
on behalf of whom a Contribution has been received by Licensor and
|
|
64
|
+
subsequently incorporated within the Work.
|
|
65
|
+
|
|
66
|
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
|
67
|
+
this License, each Contributor hereby grants to You a perpetual,
|
|
68
|
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
|
69
|
+
copyright license to reproduce, prepare Derivative Works of,
|
|
70
|
+
publicly display, publicly perform, sublicense, and distribute the
|
|
71
|
+
Work and such Derivative Works in Source or Object form.
|
|
72
|
+
|
|
73
|
+
3. Grant of Patent License. Subject to the terms and conditions of
|
|
74
|
+
this License, each Contributor hereby grants to You a perpetual,
|
|
75
|
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
|
76
|
+
(except as stated in this section) patent license to make, have made,
|
|
77
|
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
|
78
|
+
where such license applies only to those patent claims licensable
|
|
79
|
+
by such Contributor that are necessarily infringed by their
|
|
80
|
+
Contribution(s) alone or by combination of their Contribution(s)
|
|
81
|
+
with the Work to which such Contribution(s) was submitted. If You
|
|
82
|
+
institute patent litigation against any entity (including a
|
|
83
|
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
|
84
|
+
or a Contribution incorporated within the Work constitutes direct
|
|
85
|
+
or contributory patent infringement, then any patent licenses
|
|
86
|
+
granted to You under this License for that Work shall terminate
|
|
87
|
+
as of the date such litigation is filed.
|
|
88
|
+
|
|
89
|
+
4. Redistribution. You may reproduce and distribute copies of the
|
|
90
|
+
Work or Derivative Works thereof in any medium, with or without
|
|
91
|
+
modifications, and in Source or Object form, provided that You
|
|
92
|
+
meet the following conditions:
|
|
93
|
+
|
|
94
|
+
(a) You must give any other recipients of the Work or
|
|
95
|
+
Derivative Works a copy of this License; and
|
|
96
|
+
|
|
97
|
+
(b) You must cause any modified files to carry prominent notices
|
|
98
|
+
stating that You changed the files; and
|
|
99
|
+
|
|
100
|
+
(c) You must retain, in the Source form of any Derivative Works
|
|
101
|
+
that You distribute, all copyright, patent, trademark, and
|
|
102
|
+
attribution notices from the Source form of the Work,
|
|
103
|
+
excluding those notices that do not pertain to any part of
|
|
104
|
+
the Derivative Works; and
|
|
105
|
+
|
|
106
|
+
(d) If the Work includes a "NOTICE" text file as part of its
|
|
107
|
+
distribution, then any Derivative Works that You distribute must
|
|
108
|
+
include a readable copy of the attribution notices contained
|
|
109
|
+
within such NOTICE file, excluding those notices that do not
|
|
110
|
+
pertain to any part of the Derivative Works, in at least one
|
|
111
|
+
of the following places: within a NOTICE text file distributed
|
|
112
|
+
as part of the Derivative Works; within the Source form or
|
|
113
|
+
documentation, if provided along with the Derivative Works; or,
|
|
114
|
+
within a display generated by the Derivative Works, if and
|
|
115
|
+
wherever such third-party notices normally appear. The contents
|
|
116
|
+
of the NOTICE file are for informational purposes only and
|
|
117
|
+
do not modify the License. You may add Your own attribution
|
|
118
|
+
notices within Derivative Works that You distribute, alongside
|
|
119
|
+
or as an addendum to the NOTICE text from the Work, provided
|
|
120
|
+
that such additional attribution notices cannot be construed
|
|
121
|
+
as modifying the License.
|
|
122
|
+
|
|
123
|
+
You may add Your own copyright statement to Your modifications and
|
|
124
|
+
may provide additional or different license terms and conditions
|
|
125
|
+
for use, reproduction, or distribution of Your modifications, or
|
|
126
|
+
for any such Derivative Works as a whole, provided Your use,
|
|
127
|
+
reproduction, and distribution of the Work otherwise complies with
|
|
128
|
+
the conditions stated in this License.
|
|
129
|
+
|
|
130
|
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
|
131
|
+
any Contribution intentionally submitted for inclusion in the Work
|
|
132
|
+
by You to the Licensor shall be under the terms and conditions of
|
|
133
|
+
this License, without any additional terms or conditions.
|
|
134
|
+
Notwithstanding the above, nothing herein shall supersede or modify
|
|
135
|
+
the terms of any separate license agreement you may have executed
|
|
136
|
+
with Licensor regarding such Contributions.
|
|
137
|
+
|
|
138
|
+
6. Trademarks. This License does not grant permission to use the trade
|
|
139
|
+
names, trademarks, service marks, or product names of the Licensor,
|
|
140
|
+
except as required for reasonable and customary use in describing the
|
|
141
|
+
origin of the Work and reproducing the content of the NOTICE file.
|
|
142
|
+
|
|
143
|
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
|
144
|
+
agreed to in writing, Licensor provides the Work (and each
|
|
145
|
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
|
146
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
147
|
+
implied, including, without limitation, any warranties or conditions
|
|
148
|
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
|
149
|
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
|
150
|
+
appropriateness of using or redistributing the Work and assume any
|
|
151
|
+
risks associated with Your exercise of permissions under this License.
|
|
152
|
+
|
|
153
|
+
8. Limitation of Liability. In no event and under no legal theory,
|
|
154
|
+
whether in tort (including negligence), contract, or otherwise,
|
|
155
|
+
unless required by applicable law (such as deliberate and grossly
|
|
156
|
+
negligent acts) or agreed to in writing, shall any Contributor be
|
|
157
|
+
liable to You for damages, including any direct, indirect, special,
|
|
158
|
+
incidental, or consequential damages of any character arising as a
|
|
159
|
+
result of this License or out of the use or inability to use the
|
|
160
|
+
Work (including but not limited to damages for loss of goodwill,
|
|
161
|
+
work stoppage, computer failure or malfunction, or any and all
|
|
162
|
+
other commercial damages or losses), even if such Contributor
|
|
163
|
+
has been advised of the possibility of such damages.
|
|
164
|
+
|
|
165
|
+
9. Accepting Warranty or Additional Liability. While redistributing
|
|
166
|
+
the Work or Derivative Works thereof, You may choose to offer,
|
|
167
|
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
|
168
|
+
or other liability obligations and/or rights consistent with this
|
|
169
|
+
License. However, in accepting such obligations, You may act only
|
|
170
|
+
on Your own behalf and on Your sole responsibility, not on behalf
|
|
171
|
+
of any other Contributor, and only if You agree to indemnify,
|
|
172
|
+
defend, and hold each Contributor harmless for any liability
|
|
173
|
+
incurred by, or claims asserted against, such Contributor by reason
|
|
174
|
+
of your accepting any such warranty or additional liability.
|
|
175
|
+
|
|
176
|
+
END OF TERMS AND CONDITIONS
|
|
177
|
+
|
|
178
|
+
APPENDIX: How to apply the Apache License to your work.
|
|
179
|
+
|
|
180
|
+
To apply the Apache License to your work, attach the following
|
|
181
|
+
boilerplate notice, with the fields enclosed by brackets "[]"
|
|
182
|
+
replaced with your own identifying information. (Don't include
|
|
183
|
+
the brackets!) The text should be enclosed in the appropriate
|
|
184
|
+
comment syntax for the file format. We also recommend that a
|
|
185
|
+
file or class name and description of purpose be included on the
|
|
186
|
+
same "printed page" as the copyright notice for easier
|
|
187
|
+
identification within third-party archives.
|
|
188
|
+
|
|
189
|
+
Copyright [yyyy] [name of copyright owner]
|
|
190
|
+
|
|
191
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
|
192
|
+
you may not use this file except in compliance with the License.
|
|
193
|
+
You may obtain a copy of the License at
|
|
194
|
+
|
|
195
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
|
196
|
+
|
|
197
|
+
Unless required by applicable law or agreed to in writing, software
|
|
198
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
|
199
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
200
|
+
See the License for the specific language governing permissions and
|
|
201
|
+
limitations under the License.
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: ingero-annotate
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Writer for the Ingero agent annotation socket. Speaks the agent v0.17+ NDJSON annotation protocol; framework-agnostic.
|
|
5
|
+
Project-URL: Homepage, https://github.com/ingero-io/ingero
|
|
6
|
+
Project-URL: Repository, https://github.com/ingero-io/ingero
|
|
7
|
+
Project-URL: Documentation, https://github.com/ingero-io/ingero/blob/main/docs/commands.md
|
|
8
|
+
Project-URL: Issues, https://github.com/ingero-io/ingero/issues
|
|
9
|
+
License-Expression: Apache-2.0
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: annotation,ebpf,gpu,ingero,observability
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Intended Audience :: System Administrators
|
|
15
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
16
|
+
Classifier: Operating System :: POSIX :: Linux
|
|
17
|
+
Classifier: Programming Language :: Python :: 3
|
|
18
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
24
|
+
Classifier: Topic :: System :: Monitoring
|
|
25
|
+
Requires-Python: >=3.8
|
|
26
|
+
Description-Content-Type: text/markdown
|
|
27
|
+
|
|
28
|
+
# ingero-annotate
|
|
29
|
+
|
|
30
|
+
Writer for the Ingero agent annotation socket. Speaks the agent's
|
|
31
|
+
v0.17 NDJSON annotation protocol and nothing else, so any training
|
|
32
|
+
framework or inference frontend can inject `step`, `epoch`,
|
|
33
|
+
`task_id`, `request_id`, `model`, or any custom label into a live
|
|
34
|
+
recorded trace without a framework-specific dependency.
|
|
35
|
+
|
|
36
|
+
## Install
|
|
37
|
+
|
|
38
|
+
```
|
|
39
|
+
pip install ingero-annotate
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
## Use
|
|
43
|
+
|
|
44
|
+
```python
|
|
45
|
+
from ingero_annotate import AnnotationWriter
|
|
46
|
+
|
|
47
|
+
writer = AnnotationWriter() # default socket: /run/ingero/annotate.sock
|
|
48
|
+
writer.write({"step": "42"}) # instant annotation
|
|
49
|
+
writer.write_span({"epoch": "3"}, ...) # span annotation
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
The full public surface is documented in the module's docstring;
|
|
53
|
+
see `python/ingero-annotate/ingero_annotate.py`.
|
|
54
|
+
|
|
55
|
+
The agent must be running with the annotation socket bound
|
|
56
|
+
(`ingero trace --record --annotate`). When the socket is absent or
|
|
57
|
+
the agent has not bound it, the writer drops silently and the
|
|
58
|
+
caller's code path is untouched.
|
|
59
|
+
|
|
60
|
+
## What this is and is not
|
|
61
|
+
|
|
62
|
+
This is the protocol layer. It validates label keys and values
|
|
63
|
+
against the agent's contract (`pkg/contract/annotate.go` in the
|
|
64
|
+
agent repo), opens the Unix-domain socket, sends NDJSON, handles
|
|
65
|
+
reconnect, and gives you back a small Python API. It has no
|
|
66
|
+
framework dependency.
|
|
67
|
+
|
|
68
|
+
The framework adapters live in the Ingero agent repository under
|
|
69
|
+
[`examples/integrations/`](https://github.com/ingero-io/ingero/tree/main/examples/integrations):
|
|
70
|
+
|
|
71
|
+
- `pytorch-lightning/ingero_lightning.py` (Lightning callback)
|
|
72
|
+
- `ray/ingero_ray.py` (Ray task hook)
|
|
73
|
+
- `hf-trainer/ingero_hf.py` (HF Trainer callback)
|
|
74
|
+
- `deepspeed/ingero_deepspeed.py` (DeepSpeed wrapper)
|
|
75
|
+
- `accelerate/ingero_accelerate.py` (Accelerate hook)
|
|
76
|
+
- `vllm/ingero_vllm.py` (per-request inference emitter, v0.19+)
|
|
77
|
+
|
|
78
|
+
Each of those imports from this package and adds the
|
|
79
|
+
framework-specific wiring.
|
|
80
|
+
|
|
81
|
+
## Wire protocol summary
|
|
82
|
+
|
|
83
|
+
Each line is one NDJSON annotation object:
|
|
84
|
+
|
|
85
|
+
```json
|
|
86
|
+
{"labels": {"step": "42"}, "pid": 1234, "ts": 1700000000000000000}
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
`labels` is required. `pid` scopes to a process incarnation; without
|
|
90
|
+
it the annotation is trace-wide. `ts` is optional unix nanoseconds
|
|
91
|
+
(the agent stamps receive time when absent). Spans add `span_start`
|
|
92
|
+
and `span_end` (both unix nanoseconds).
|
|
93
|
+
|
|
94
|
+
The full contract, including label-key/value charsets, length
|
|
95
|
+
limits, and the v0.19 per-request keys, is documented in
|
|
96
|
+
`pkg/contract/annotate.go` and `docs/commands.md` in the agent
|
|
97
|
+
repository.
|
|
98
|
+
|
|
99
|
+
## Honesty notes for per-request inference correlation
|
|
100
|
+
|
|
101
|
+
v0.19+ adds `request_id`, `model`, `prompt_len`, `output_len`,
|
|
102
|
+
`preempted`, `arrival`, `first_token`, `finished` to the contract.
|
|
103
|
+
When used with `ingero explain --by-request` / `ingero query
|
|
104
|
+
--by-request`, the output is a TIME-OVERLAP slice, not exclusive
|
|
105
|
+
kernel ownership. Continuous batching shares kernel launches across
|
|
106
|
+
many in-flight requests; the agent's renderer prints this caveat
|
|
107
|
+
verbatim every time. The library does not interpret `request_id`
|
|
108
|
+
semantics beyond label validation; the agent does.
|
|
109
|
+
|
|
110
|
+
## License
|
|
111
|
+
|
|
112
|
+
Apache-2.0.
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
# ingero-annotate
|
|
2
|
+
|
|
3
|
+
Writer for the Ingero agent annotation socket. Speaks the agent's
|
|
4
|
+
v0.17 NDJSON annotation protocol and nothing else, so any training
|
|
5
|
+
framework or inference frontend can inject `step`, `epoch`,
|
|
6
|
+
`task_id`, `request_id`, `model`, or any custom label into a live
|
|
7
|
+
recorded trace without a framework-specific dependency.
|
|
8
|
+
|
|
9
|
+
## Install
|
|
10
|
+
|
|
11
|
+
```
|
|
12
|
+
pip install ingero-annotate
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
## Use
|
|
16
|
+
|
|
17
|
+
```python
|
|
18
|
+
from ingero_annotate import AnnotationWriter
|
|
19
|
+
|
|
20
|
+
writer = AnnotationWriter() # default socket: /run/ingero/annotate.sock
|
|
21
|
+
writer.write({"step": "42"}) # instant annotation
|
|
22
|
+
writer.write_span({"epoch": "3"}, ...) # span annotation
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
The full public surface is documented in the module's docstring;
|
|
26
|
+
see `python/ingero-annotate/ingero_annotate.py`.
|
|
27
|
+
|
|
28
|
+
The agent must be running with the annotation socket bound
|
|
29
|
+
(`ingero trace --record --annotate`). When the socket is absent or
|
|
30
|
+
the agent has not bound it, the writer drops silently and the
|
|
31
|
+
caller's code path is untouched.
|
|
32
|
+
|
|
33
|
+
## What this is and is not
|
|
34
|
+
|
|
35
|
+
This is the protocol layer. It validates label keys and values
|
|
36
|
+
against the agent's contract (`pkg/contract/annotate.go` in the
|
|
37
|
+
agent repo), opens the Unix-domain socket, sends NDJSON, handles
|
|
38
|
+
reconnect, and gives you back a small Python API. It has no
|
|
39
|
+
framework dependency.
|
|
40
|
+
|
|
41
|
+
The framework adapters live in the Ingero agent repository under
|
|
42
|
+
[`examples/integrations/`](https://github.com/ingero-io/ingero/tree/main/examples/integrations):
|
|
43
|
+
|
|
44
|
+
- `pytorch-lightning/ingero_lightning.py` (Lightning callback)
|
|
45
|
+
- `ray/ingero_ray.py` (Ray task hook)
|
|
46
|
+
- `hf-trainer/ingero_hf.py` (HF Trainer callback)
|
|
47
|
+
- `deepspeed/ingero_deepspeed.py` (DeepSpeed wrapper)
|
|
48
|
+
- `accelerate/ingero_accelerate.py` (Accelerate hook)
|
|
49
|
+
- `vllm/ingero_vllm.py` (per-request inference emitter, v0.19+)
|
|
50
|
+
|
|
51
|
+
Each of those imports from this package and adds the
|
|
52
|
+
framework-specific wiring.
|
|
53
|
+
|
|
54
|
+
## Wire protocol summary
|
|
55
|
+
|
|
56
|
+
Each line is one NDJSON annotation object:
|
|
57
|
+
|
|
58
|
+
```json
|
|
59
|
+
{"labels": {"step": "42"}, "pid": 1234, "ts": 1700000000000000000}
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
`labels` is required. `pid` scopes to a process incarnation; without
|
|
63
|
+
it the annotation is trace-wide. `ts` is optional unix nanoseconds
|
|
64
|
+
(the agent stamps receive time when absent). Spans add `span_start`
|
|
65
|
+
and `span_end` (both unix nanoseconds).
|
|
66
|
+
|
|
67
|
+
The full contract, including label-key/value charsets, length
|
|
68
|
+
limits, and the v0.19 per-request keys, is documented in
|
|
69
|
+
`pkg/contract/annotate.go` and `docs/commands.md` in the agent
|
|
70
|
+
repository.
|
|
71
|
+
|
|
72
|
+
## Honesty notes for per-request inference correlation
|
|
73
|
+
|
|
74
|
+
v0.19+ adds `request_id`, `model`, `prompt_len`, `output_len`,
|
|
75
|
+
`preempted`, `arrival`, `first_token`, `finished` to the contract.
|
|
76
|
+
When used with `ingero explain --by-request` / `ingero query
|
|
77
|
+
--by-request`, the output is a TIME-OVERLAP slice, not exclusive
|
|
78
|
+
kernel ownership. Continuous batching shares kernel launches across
|
|
79
|
+
many in-flight requests; the agent's renderer prints this caveat
|
|
80
|
+
verbatim every time. The library does not interpret `request_id`
|
|
81
|
+
semantics beyond label validation; the agent does.
|
|
82
|
+
|
|
83
|
+
## License
|
|
84
|
+
|
|
85
|
+
Apache-2.0.
|
|
@@ -0,0 +1,296 @@
|
|
|
1
|
+
"""Framework-agnostic writer for the Ingero agent annotation socket.
|
|
2
|
+
|
|
3
|
+
This module carries no PyTorch / Lightning dependency. It speaks the
|
|
4
|
+
agent's NDJSON annotation protocol (agent v0.17.0) and nothing else, so
|
|
5
|
+
the protocol and socket behaviour can be unit-tested without importing a
|
|
6
|
+
training framework.
|
|
7
|
+
|
|
8
|
+
Wire protocol (see pkg/contract/annotate.go in the agent repo):
|
|
9
|
+
|
|
10
|
+
- The agent, when run as `ingero trace --record --annotate`, binds a
|
|
11
|
+
Unix-domain socket at /run/ingero/annotate.sock (or, when /run is not
|
|
12
|
+
writable, ~/.ingero/annotate/annotate.sock).
|
|
13
|
+
- A writer connects and sends newline-delimited JSON. Each line is one
|
|
14
|
+
annotation object:
|
|
15
|
+
|
|
16
|
+
{"labels": {"step": "42"}, "pid": 1234, "ts": 1700000000000000000}
|
|
17
|
+
|
|
18
|
+
- `labels` is required and non-empty. `pid` scopes the annotation to a
|
|
19
|
+
process incarnation. `ts` is optional unix nanoseconds; the agent
|
|
20
|
+
stamps receive time when it is absent.
|
|
21
|
+
|
|
22
|
+
Validation limits below mirror the agent's contract exactly. A line that
|
|
23
|
+
violates them is rejected by the agent without dropping the listener;
|
|
24
|
+
this writer rejects it locally first so a caller sees a clear error.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
from __future__ import annotations
|
|
28
|
+
|
|
29
|
+
import json
|
|
30
|
+
import logging
|
|
31
|
+
import os
|
|
32
|
+
import socket
|
|
33
|
+
import threading
|
|
34
|
+
import time
|
|
35
|
+
|
|
36
|
+
logger = logging.getLogger("ingero.annotate")
|
|
37
|
+
|
|
38
|
+
# --- Protocol constants, pinned to pkg/contract/annotate.go ----------------
|
|
39
|
+
|
|
40
|
+
ANNOTATION_PROTOCOL_VERSION = 1
|
|
41
|
+
ANNOTATION_SOCKET_NAME = "annotate.sock"
|
|
42
|
+
ANNOTATION_SOCKET_DIR = "/run/ingero"
|
|
43
|
+
|
|
44
|
+
MAX_LABEL_KEY_LEN = 64
|
|
45
|
+
MAX_LABEL_VALUE_LEN = 256
|
|
46
|
+
MAX_LABELS_PER_ANNOTATION = 32
|
|
47
|
+
MAX_LINE_BYTES = 16 * 1024
|
|
48
|
+
|
|
49
|
+
# Well-known label keys the distribution integrations standardize on.
|
|
50
|
+
KEY_STEP = "step"
|
|
51
|
+
KEY_EPOCH = "epoch"
|
|
52
|
+
KEY_TASK_ID = "task_id"
|
|
53
|
+
KEY_PHASE = "phase"
|
|
54
|
+
KEY_RUN_ID = "run_id"
|
|
55
|
+
|
|
56
|
+
# Allowed label-key bytes: ASCII letters, digits, underscore, dot, hyphen.
|
|
57
|
+
_KEY_CHARS = frozenset(
|
|
58
|
+
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_.-"
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class AnnotationError(ValueError):
|
|
63
|
+
"""Raised when an annotation violates the agent's contract limits."""
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def is_valid_label_key(key: str) -> bool:
|
|
67
|
+
"""Report whether key satisfies the agent's label-key contract.
|
|
68
|
+
|
|
69
|
+
Non-empty, at most MAX_LABEL_KEY_LEN bytes, every byte in the
|
|
70
|
+
[A-Za-z0-9_.-] charset. Mirrors contract.IsValidAnnotationLabelKey.
|
|
71
|
+
"""
|
|
72
|
+
if not key or len(key.encode("utf-8")) > MAX_LABEL_KEY_LEN:
|
|
73
|
+
return False
|
|
74
|
+
return all(c in _KEY_CHARS for c in key)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def validate_labels(labels: dict) -> None:
|
|
78
|
+
"""Validate a label map against the agent contract.
|
|
79
|
+
|
|
80
|
+
Raises AnnotationError on the first violation. Mirrors the checks in
|
|
81
|
+
pkg/annotate/annotation.go Validate().
|
|
82
|
+
"""
|
|
83
|
+
if not labels:
|
|
84
|
+
raise AnnotationError("annotation has no labels")
|
|
85
|
+
if len(labels) > MAX_LABELS_PER_ANNOTATION:
|
|
86
|
+
raise AnnotationError(
|
|
87
|
+
f"annotation has {len(labels)} labels, max {MAX_LABELS_PER_ANNOTATION}"
|
|
88
|
+
)
|
|
89
|
+
for key, value in labels.items():
|
|
90
|
+
if not isinstance(key, str) or not isinstance(value, str):
|
|
91
|
+
raise AnnotationError("label keys and values must be strings")
|
|
92
|
+
if not is_valid_label_key(key):
|
|
93
|
+
raise AnnotationError(
|
|
94
|
+
f"invalid label key {key!r} (charset A-Za-z0-9_.-, "
|
|
95
|
+
f"max {MAX_LABEL_KEY_LEN} bytes)"
|
|
96
|
+
)
|
|
97
|
+
vbytes = value.encode("utf-8")
|
|
98
|
+
if len(vbytes) > MAX_LABEL_VALUE_LEN:
|
|
99
|
+
raise AnnotationError(
|
|
100
|
+
f"label {key!r} value is {len(vbytes)} bytes, "
|
|
101
|
+
f"max {MAX_LABEL_VALUE_LEN}"
|
|
102
|
+
)
|
|
103
|
+
for ch in value:
|
|
104
|
+
o = ord(ch)
|
|
105
|
+
if o < 0x20 or o == 0x7F:
|
|
106
|
+
raise AnnotationError(
|
|
107
|
+
f"label {key!r} value contains a control character "
|
|
108
|
+
f"(0x{o:02x})"
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def encode_annotation(labels: dict, pid: int | None = None,
|
|
113
|
+
ts_ns: int | None = None) -> bytes:
|
|
114
|
+
"""Encode one annotation as a single NDJSON line (terminated by \\n).
|
|
115
|
+
|
|
116
|
+
Validates against the contract first, then checks the encoded line
|
|
117
|
+
against MAX_LINE_BYTES. Raises AnnotationError on any violation.
|
|
118
|
+
"""
|
|
119
|
+
validate_labels(labels)
|
|
120
|
+
obj: dict = {"labels": labels}
|
|
121
|
+
if pid is not None:
|
|
122
|
+
obj["pid"] = int(pid)
|
|
123
|
+
if ts_ns is not None:
|
|
124
|
+
obj["ts"] = int(ts_ns)
|
|
125
|
+
line = json.dumps(obj, separators=(",", ":"), sort_keys=True).encode("utf-8")
|
|
126
|
+
if len(line) + 1 > MAX_LINE_BYTES:
|
|
127
|
+
raise AnnotationError(
|
|
128
|
+
f"encoded annotation is {len(line) + 1} bytes, max {MAX_LINE_BYTES}"
|
|
129
|
+
)
|
|
130
|
+
return line + b"\n"
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def default_socket_path() -> str:
|
|
134
|
+
"""Return the annotation socket path the agent binds.
|
|
135
|
+
|
|
136
|
+
Prefers the canonical /run/ingero/annotate.sock when a socket is
|
|
137
|
+
actually bound there (the privileged-trace case). Otherwise falls
|
|
138
|
+
back to ~/.ingero/annotate/annotate.sock, mirroring the agent's
|
|
139
|
+
resolveSocketDir / SocketPath logic for the unprivileged-trace case.
|
|
140
|
+
"""
|
|
141
|
+
run_path = os.path.join(ANNOTATION_SOCKET_DIR, ANNOTATION_SOCKET_NAME)
|
|
142
|
+
try:
|
|
143
|
+
# This exists/is-socket check is advisory only - it just picks the
|
|
144
|
+
# more likely path. The real guard is _connect's OSError handling,
|
|
145
|
+
# which copes with the socket vanishing between here and connect.
|
|
146
|
+
# Do not "harden" this into a stricter pre-flight check: that would
|
|
147
|
+
# introduce a TOCTOU race for no gain.
|
|
148
|
+
if os.path.exists(run_path) and _is_socket(run_path):
|
|
149
|
+
return run_path
|
|
150
|
+
except OSError:
|
|
151
|
+
pass
|
|
152
|
+
home = os.path.expanduser("~")
|
|
153
|
+
return os.path.join(home, ".ingero", "annotate", ANNOTATION_SOCKET_NAME)
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def _is_socket(path: str) -> bool:
|
|
157
|
+
import stat
|
|
158
|
+
try:
|
|
159
|
+
return stat.S_ISSOCK(os.lstat(path).st_mode)
|
|
160
|
+
except OSError:
|
|
161
|
+
return False
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
class AnnotationWriter:
|
|
165
|
+
"""A reusable, thread-safe connection to the agent annotation socket.
|
|
166
|
+
|
|
167
|
+
The writer opens the Unix-domain socket once and reuses it for every
|
|
168
|
+
annotation. It is graceful by design: if the socket does not exist
|
|
169
|
+
(the agent is not running with `--annotate`) the writer starts as a
|
|
170
|
+
silent no-op after one log line. It never raises into the caller's
|
|
171
|
+
hot path - a contract violation is the only thing surfaced, and only
|
|
172
|
+
from the explicit `write` call.
|
|
173
|
+
|
|
174
|
+
On a write failure (the agent restarted mid-run, the peer closed) the
|
|
175
|
+
writer makes exactly one bounded reconnect attempt: it closes the
|
|
176
|
+
dead socket, reconnects once, and retries the write. If the reconnect
|
|
177
|
+
or the retried write also fails it goes inert for the rest of the
|
|
178
|
+
run. One attempt only - no retry loop, no backoff, no retry storm in
|
|
179
|
+
a training hot path.
|
|
180
|
+
|
|
181
|
+
Typical use from a framework hook:
|
|
182
|
+
|
|
183
|
+
w = AnnotationWriter() # connects, or becomes a no-op
|
|
184
|
+
w.write({"step": "10"}, pid=os.getpid())
|
|
185
|
+
...
|
|
186
|
+
w.close()
|
|
187
|
+
"""
|
|
188
|
+
|
|
189
|
+
def __init__(self, socket_path: str | None = None,
|
|
190
|
+
connect_timeout: float = 2.0):
|
|
191
|
+
self._path = socket_path or default_socket_path()
|
|
192
|
+
self._timeout = connect_timeout
|
|
193
|
+
self._lock = threading.Lock()
|
|
194
|
+
self._sock: socket.socket | None = None
|
|
195
|
+
self._active = False
|
|
196
|
+
self._logged_unavailable = False
|
|
197
|
+
self._connect()
|
|
198
|
+
|
|
199
|
+
@property
|
|
200
|
+
def active(self) -> bool:
|
|
201
|
+
"""True when the socket is connected and annotations will be sent."""
|
|
202
|
+
return self._active
|
|
203
|
+
|
|
204
|
+
@property
|
|
205
|
+
def socket_path(self) -> str:
|
|
206
|
+
return self._path
|
|
207
|
+
|
|
208
|
+
def _connect(self) -> None:
|
|
209
|
+
try:
|
|
210
|
+
s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
|
|
211
|
+
s.settimeout(self._timeout)
|
|
212
|
+
s.connect(self._path)
|
|
213
|
+
s.settimeout(None)
|
|
214
|
+
self._sock = s
|
|
215
|
+
self._active = True
|
|
216
|
+
except OSError as exc:
|
|
217
|
+
self._sock = None
|
|
218
|
+
self._active = False
|
|
219
|
+
if not self._logged_unavailable:
|
|
220
|
+
logger.info(
|
|
221
|
+
"ingero annotation socket unavailable at %s (%s); "
|
|
222
|
+
"annotations disabled. Run the agent with "
|
|
223
|
+
"'trace --record --annotate' to enable.",
|
|
224
|
+
self._path, exc,
|
|
225
|
+
)
|
|
226
|
+
self._logged_unavailable = True
|
|
227
|
+
|
|
228
|
+
def write(self, labels: dict, pid: int | None = None,
|
|
229
|
+
ts_ns: int | None = None) -> bool:
|
|
230
|
+
"""Send one annotation. Returns True if it was written to the socket.
|
|
231
|
+
|
|
232
|
+
A contract violation raises AnnotationError - that is a caller
|
|
233
|
+
bug, surfaced loudly. A transport failure (socket gone, peer
|
|
234
|
+
closed) is swallowed: the writer makes one bounded reconnect
|
|
235
|
+
attempt and retries the write; if that also fails it flips to
|
|
236
|
+
no-op and returns False so a training loop is never interrupted
|
|
237
|
+
by agent unavailability.
|
|
238
|
+
"""
|
|
239
|
+
line = encode_annotation(labels, pid=pid, ts_ns=ts_ns)
|
|
240
|
+
with self._lock:
|
|
241
|
+
if not self._active or self._sock is None:
|
|
242
|
+
return False
|
|
243
|
+
try:
|
|
244
|
+
self._sock.sendall(line)
|
|
245
|
+
return True
|
|
246
|
+
except OSError as exc:
|
|
247
|
+
logger.info(
|
|
248
|
+
"ingero annotation socket write failed (%s); "
|
|
249
|
+
"attempting one reconnect.", exc,
|
|
250
|
+
)
|
|
251
|
+
self._close_locked()
|
|
252
|
+
# One bounded reconnect attempt - the agent may have
|
|
253
|
+
# restarted mid-run. No loop, no backoff.
|
|
254
|
+
self._connect()
|
|
255
|
+
if not self._active or self._sock is None:
|
|
256
|
+
logger.info(
|
|
257
|
+
"ingero annotation reconnect failed; annotations "
|
|
258
|
+
"disabled for the rest of this run.",
|
|
259
|
+
)
|
|
260
|
+
return False
|
|
261
|
+
try:
|
|
262
|
+
self._sock.sendall(line)
|
|
263
|
+
return True
|
|
264
|
+
except OSError as exc2:
|
|
265
|
+
logger.info(
|
|
266
|
+
"ingero annotation write failed after reconnect "
|
|
267
|
+
"(%s); annotations disabled for the rest of this "
|
|
268
|
+
"run.", exc2,
|
|
269
|
+
)
|
|
270
|
+
self._close_locked()
|
|
271
|
+
return False
|
|
272
|
+
|
|
273
|
+
def _close_locked(self) -> None:
|
|
274
|
+
if self._sock is not None:
|
|
275
|
+
try:
|
|
276
|
+
self._sock.close()
|
|
277
|
+
except OSError:
|
|
278
|
+
pass
|
|
279
|
+
self._sock = None
|
|
280
|
+
self._active = False
|
|
281
|
+
|
|
282
|
+
def close(self) -> None:
|
|
283
|
+
"""Close the socket. Safe to call more than once."""
|
|
284
|
+
with self._lock:
|
|
285
|
+
self._close_locked()
|
|
286
|
+
|
|
287
|
+
def __enter__(self) -> "AnnotationWriter":
|
|
288
|
+
return self
|
|
289
|
+
|
|
290
|
+
def __exit__(self, *exc) -> None:
|
|
291
|
+
self.close()
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
def now_ns() -> int:
|
|
295
|
+
"""Current time in unix nanoseconds, for an explicit annotation ts."""
|
|
296
|
+
return time.time_ns()
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "ingero-annotate"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Writer for the Ingero agent annotation socket. Speaks the agent v0.17+ NDJSON annotation protocol; framework-agnostic."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.8"
|
|
11
|
+
license = "Apache-2.0"
|
|
12
|
+
license-files = ["LICENSE"]
|
|
13
|
+
keywords = ["ebpf", "gpu", "observability", "annotation", "ingero"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 4 - Beta",
|
|
16
|
+
"Intended Audience :: Developers",
|
|
17
|
+
"Intended Audience :: System Administrators",
|
|
18
|
+
"License :: OSI Approved :: Apache Software License",
|
|
19
|
+
"Operating System :: POSIX :: Linux",
|
|
20
|
+
"Programming Language :: Python :: 3",
|
|
21
|
+
"Programming Language :: Python :: 3 :: Only",
|
|
22
|
+
"Programming Language :: Python :: 3.8",
|
|
23
|
+
"Programming Language :: Python :: 3.9",
|
|
24
|
+
"Programming Language :: Python :: 3.10",
|
|
25
|
+
"Programming Language :: Python :: 3.11",
|
|
26
|
+
"Programming Language :: Python :: 3.12",
|
|
27
|
+
"Topic :: System :: Monitoring",
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
[project.urls]
|
|
31
|
+
Homepage = "https://github.com/ingero-io/ingero"
|
|
32
|
+
Repository = "https://github.com/ingero-io/ingero"
|
|
33
|
+
Documentation = "https://github.com/ingero-io/ingero/blob/main/docs/commands.md"
|
|
34
|
+
Issues = "https://github.com/ingero-io/ingero/issues"
|
|
35
|
+
|
|
36
|
+
[tool.hatch.build.targets.wheel]
|
|
37
|
+
# Single-module package: the importable name is `ingero_annotate`,
|
|
38
|
+
# the file sits at the package root. Hatchling includes top-level
|
|
39
|
+
# `.py` files automatically when `packages` is empty, but listing the
|
|
40
|
+
# module makes the layout intent explicit.
|
|
41
|
+
packages = []
|
|
42
|
+
include = ["ingero_annotate.py"]
|
|
43
|
+
|
|
44
|
+
[tool.hatch.build.targets.sdist]
|
|
45
|
+
include = ["ingero_annotate.py", "README.md", "tests/"]
|
|
@@ -0,0 +1,447 @@
|
|
|
1
|
+
"""End-to-end install-and-use tests for the ingero-annotate package.
|
|
2
|
+
|
|
3
|
+
These tests simulate exactly what a user does on a fresh machine:
|
|
4
|
+
|
|
5
|
+
pip install ingero-annotate
|
|
6
|
+
python -c "from ingero_annotate import AnnotationWriter; ..."
|
|
7
|
+
|
|
8
|
+
with the agent's annotation socket on the other end. The tests run
|
|
9
|
+
`pip install` (not `python -m build` + extract; pip is the actual
|
|
10
|
+
distribution channel) against the source tree, then spawn a fresh
|
|
11
|
+
Python subprocess that:
|
|
12
|
+
|
|
13
|
+
1. Imports `ingero_annotate` FROM THE INSTALLED LOCATION (the
|
|
14
|
+
source tree is intentionally NOT on sys.path; if the wrong copy
|
|
15
|
+
is imported, the assertion on `__file__` catches it).
|
|
16
|
+
2. Stands up a Unix-domain socket server in a background thread.
|
|
17
|
+
3. Uses the public API (`AnnotationWriter`, `encode_annotation`,
|
|
18
|
+
`validate_labels`) to write annotations.
|
|
19
|
+
4. Verifies the wire payload reached the server with the right
|
|
20
|
+
NDJSON shape.
|
|
21
|
+
|
|
22
|
+
The test catches every category of release-day "doesn't work on the
|
|
23
|
+
first try" failure:
|
|
24
|
+
|
|
25
|
+
- `pyproject.toml` is malformed (`pip install` would refuse).
|
|
26
|
+
- Hatchling wheel target misses the module file (`pip install`
|
|
27
|
+
succeeds but import fails).
|
|
28
|
+
- Public symbols are renamed or removed (subprocess import fails).
|
|
29
|
+
- The wire payload regresses (subprocess assertion fails).
|
|
30
|
+
|
|
31
|
+
A maintainer who ships a v0.1.1 (or any future release) and breaks
|
|
32
|
+
ANY of those will see this test fail in CI long before PyPI.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
from __future__ import annotations
|
|
36
|
+
|
|
37
|
+
import json
|
|
38
|
+
import os
|
|
39
|
+
import shutil
|
|
40
|
+
import subprocess
|
|
41
|
+
import sys
|
|
42
|
+
import textwrap
|
|
43
|
+
from pathlib import Path
|
|
44
|
+
|
|
45
|
+
import pytest
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
# python/ingero-annotate/tests/this_file -> 3x parent = repo root.
|
|
49
|
+
REPO_ROOT = Path(__file__).resolve().parents[3]
|
|
50
|
+
PKG_DIR = REPO_ROOT / "python" / "ingero-annotate"
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _have_pip() -> bool:
|
|
54
|
+
return shutil.which("pip") is not None or shutil.which("pip3") is not None
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
# The driver script is run in a SEPARATE Python subprocess against the
|
|
58
|
+
# installed-target directory. It must not import anything from the
|
|
59
|
+
# source tree, so it self-contains everything (no helpers from this
|
|
60
|
+
# test file's scope are accessible).
|
|
61
|
+
DRIVER = textwrap.dedent(
|
|
62
|
+
"""
|
|
63
|
+
import json
|
|
64
|
+
import os
|
|
65
|
+
import socket
|
|
66
|
+
import sys
|
|
67
|
+
import threading
|
|
68
|
+
import time
|
|
69
|
+
|
|
70
|
+
install_dir, sock_path = sys.argv[1], sys.argv[2]
|
|
71
|
+
|
|
72
|
+
# Force imports to come from the installed-target dir ONLY.
|
|
73
|
+
# We deliberately do NOT add the source tree to sys.path; the
|
|
74
|
+
# test asserts on __file__ that the loaded module is the
|
|
75
|
+
# installed one.
|
|
76
|
+
sys.path.insert(0, install_dir)
|
|
77
|
+
|
|
78
|
+
from ingero_annotate import (
|
|
79
|
+
AnnotationWriter,
|
|
80
|
+
encode_annotation,
|
|
81
|
+
validate_labels,
|
|
82
|
+
)
|
|
83
|
+
import ingero_annotate as _mod
|
|
84
|
+
|
|
85
|
+
# Print the resolved module path so the test can assert it landed
|
|
86
|
+
# in the install dir, not in the source tree.
|
|
87
|
+
print("MODULE_FILE:", _mod.__file__, flush=True)
|
|
88
|
+
print("DRIVER_PID:", os.getpid(), flush=True)
|
|
89
|
+
|
|
90
|
+
# Exercise the no-socket helpers first - encode_annotation and
|
|
91
|
+
# validate_labels are part of the public API and would surface
|
|
92
|
+
# an import-time crash from a partial-copy wheel.
|
|
93
|
+
validate_labels({"step": "42", "request_id": "req-abc.123"})
|
|
94
|
+
encoded = encode_annotation(
|
|
95
|
+
labels={"step": "42", "request_id": "req-abc.123"},
|
|
96
|
+
pid=os.getpid(),
|
|
97
|
+
ts_ns=1_700_000_000_000_000_000,
|
|
98
|
+
)
|
|
99
|
+
assert isinstance(encoded, (bytes, bytearray, str)), (
|
|
100
|
+
f"encode_annotation returned wrong type: {type(encoded)}"
|
|
101
|
+
)
|
|
102
|
+
print("ENCODE_LEN:", len(encoded), flush=True)
|
|
103
|
+
|
|
104
|
+
# Stand up the UDS server before the writer connects.
|
|
105
|
+
received = []
|
|
106
|
+
ready = threading.Event()
|
|
107
|
+
|
|
108
|
+
def serve():
|
|
109
|
+
srv = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
|
|
110
|
+
srv.bind(sock_path)
|
|
111
|
+
srv.listen(1)
|
|
112
|
+
srv.settimeout(5.0)
|
|
113
|
+
ready.set()
|
|
114
|
+
try:
|
|
115
|
+
conn, _ = srv.accept()
|
|
116
|
+
finally:
|
|
117
|
+
srv.close()
|
|
118
|
+
with conn:
|
|
119
|
+
conn.settimeout(5.0)
|
|
120
|
+
buf = b""
|
|
121
|
+
while len(received) < 1:
|
|
122
|
+
try:
|
|
123
|
+
chunk = conn.recv(4096)
|
|
124
|
+
except socket.timeout:
|
|
125
|
+
break
|
|
126
|
+
if not chunk:
|
|
127
|
+
break
|
|
128
|
+
buf += chunk
|
|
129
|
+
while b"\\n" in buf:
|
|
130
|
+
line, buf = buf.split(b"\\n", 1)
|
|
131
|
+
received.append(line)
|
|
132
|
+
|
|
133
|
+
t = threading.Thread(target=serve, daemon=True)
|
|
134
|
+
t.start()
|
|
135
|
+
if not ready.wait(2.0):
|
|
136
|
+
sys.exit("server failed to bind in 2s")
|
|
137
|
+
|
|
138
|
+
# Public-API smoke: open a writer, send one annotation, observe
|
|
139
|
+
# the wire payload.
|
|
140
|
+
writer = AnnotationWriter(socket_path=sock_path)
|
|
141
|
+
try:
|
|
142
|
+
writer.write({"step": "42", "request_id": "req-abc.123"}, pid=os.getpid())
|
|
143
|
+
finally:
|
|
144
|
+
writer.close()
|
|
145
|
+
|
|
146
|
+
# Wait briefly for the server thread to drain.
|
|
147
|
+
t.join(timeout=3.0)
|
|
148
|
+
if not received:
|
|
149
|
+
sys.exit("no annotation line received within 3s")
|
|
150
|
+
|
|
151
|
+
# Echo the line back so the test can assert on the JSON shape.
|
|
152
|
+
print("LINE:", received[0].decode("utf-8"), flush=True)
|
|
153
|
+
"""
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
@pytest.mark.skipif(not _have_pip(), reason="pip is required for this E2E test")
|
|
158
|
+
def test_pip_install_then_import_and_write(tmp_path: Path) -> None:
|
|
159
|
+
"""The full user flow: pip install -> import -> write -> verify."""
|
|
160
|
+
install_dir = tmp_path / "site"
|
|
161
|
+
install_dir.mkdir()
|
|
162
|
+
|
|
163
|
+
# Step 1: real `pip install --target` against the package
|
|
164
|
+
# directory. This goes through the build backend (hatchling),
|
|
165
|
+
# builds a wheel, and installs it as if from PyPI. --no-deps
|
|
166
|
+
# keeps the test hermetic.
|
|
167
|
+
pip_cmd = [
|
|
168
|
+
sys.executable,
|
|
169
|
+
"-m",
|
|
170
|
+
"pip",
|
|
171
|
+
"install",
|
|
172
|
+
"--target",
|
|
173
|
+
str(install_dir),
|
|
174
|
+
"--no-deps",
|
|
175
|
+
"--quiet",
|
|
176
|
+
"--disable-pip-version-check",
|
|
177
|
+
str(PKG_DIR),
|
|
178
|
+
]
|
|
179
|
+
result = subprocess.run(
|
|
180
|
+
pip_cmd,
|
|
181
|
+
capture_output=True,
|
|
182
|
+
text=True,
|
|
183
|
+
timeout=180,
|
|
184
|
+
)
|
|
185
|
+
if result.returncode != 0:
|
|
186
|
+
pytest.fail(
|
|
187
|
+
"`pip install` of the package failed. This is exactly the "
|
|
188
|
+
"release-day blocker the test exists to catch.\n"
|
|
189
|
+
f"stdout:\n{result.stdout}\n"
|
|
190
|
+
f"stderr:\n{result.stderr}"
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
# Step 2: the wheel must have placed ingero_annotate.py in the
|
|
194
|
+
# install target. Hatchling's wheel target ships the top-level
|
|
195
|
+
# module; if pyproject.toml's `include` ever drifts, this fires.
|
|
196
|
+
installed_module = install_dir / "ingero_annotate.py"
|
|
197
|
+
if not installed_module.is_file():
|
|
198
|
+
contents = sorted(p.name for p in install_dir.iterdir())
|
|
199
|
+
pytest.fail(
|
|
200
|
+
"pip install succeeded but ingero_annotate.py is missing "
|
|
201
|
+
f"from {install_dir}. Contents: {contents}"
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
# Step 3: the dist-info dir must also be present (proves it
|
|
205
|
+
# actually went through the wheel pipeline, not a copy).
|
|
206
|
+
dist_info = list(install_dir.glob("ingero_annotate-*.dist-info"))
|
|
207
|
+
assert dist_info, (
|
|
208
|
+
f"no ingero_annotate-*.dist-info in {install_dir}; "
|
|
209
|
+
"the install did not go through the wheel pipeline"
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
# Step 4: run the driver subprocess. It uses the installed
|
|
213
|
+
# module exclusively. The subprocess inherits NO PYTHONPATH from
|
|
214
|
+
# the test process (env=clean-ish) so the source tree cannot
|
|
215
|
+
# accidentally shadow the installed copy.
|
|
216
|
+
sock_path = str(tmp_path / "annotate.sock")
|
|
217
|
+
driver_file = tmp_path / "driver.py"
|
|
218
|
+
driver_file.write_text(DRIVER)
|
|
219
|
+
|
|
220
|
+
# Strip any inherited PYTHONPATH that could point at the source
|
|
221
|
+
# tree; pass the install dir explicitly via argv instead. PATH
|
|
222
|
+
# and other env stay intact so the subprocess can find Python.
|
|
223
|
+
env = {k: v for k, v in os.environ.items() if k != "PYTHONPATH"}
|
|
224
|
+
|
|
225
|
+
proc = subprocess.run(
|
|
226
|
+
[sys.executable, str(driver_file), str(install_dir), sock_path],
|
|
227
|
+
capture_output=True,
|
|
228
|
+
text=True,
|
|
229
|
+
timeout=30,
|
|
230
|
+
env=env,
|
|
231
|
+
)
|
|
232
|
+
if proc.returncode != 0:
|
|
233
|
+
pytest.fail(
|
|
234
|
+
"driver subprocess failed (user-flow simulation broke).\n"
|
|
235
|
+
f"argv: install_dir={install_dir} sock={sock_path}\n"
|
|
236
|
+
f"stdout:\n{proc.stdout}\n"
|
|
237
|
+
f"stderr:\n{proc.stderr}"
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
# Step 5: verify what the subprocess reported.
|
|
241
|
+
lines = proc.stdout.splitlines()
|
|
242
|
+
|
|
243
|
+
# 5a: the loaded module file MUST be the installed one. If the
|
|
244
|
+
# subprocess imported from the source tree, the install was a
|
|
245
|
+
# no-op and the test would silently lie about coverage.
|
|
246
|
+
module_lines = [l for l in lines if l.startswith("MODULE_FILE:")]
|
|
247
|
+
assert module_lines, f"driver did not print MODULE_FILE: {proc.stdout!r}"
|
|
248
|
+
loaded_path = module_lines[0].split(":", 1)[1].strip()
|
|
249
|
+
assert str(install_dir) in loaded_path, (
|
|
250
|
+
f"driver imported the WRONG ingero_annotate module: "
|
|
251
|
+
f"loaded {loaded_path!r}, expected one under {install_dir!r}"
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
# 5b: encode_annotation returned a non-empty payload (the
|
|
255
|
+
# public helper is intact).
|
|
256
|
+
encode_lines = [l for l in lines if l.startswith("ENCODE_LEN:")]
|
|
257
|
+
assert encode_lines, "driver did not print ENCODE_LEN"
|
|
258
|
+
assert int(encode_lines[0].split(":", 1)[1].strip()) > 0
|
|
259
|
+
|
|
260
|
+
# 5c: a real NDJSON line crossed the wire. Parse and verify
|
|
261
|
+
# the v0.17 wire-shape fields.
|
|
262
|
+
line_lines = [l for l in lines if l.startswith("LINE:")]
|
|
263
|
+
assert line_lines, f"driver did not print LINE: {proc.stdout!r}"
|
|
264
|
+
wire = line_lines[0].split(":", 1)[1].strip()
|
|
265
|
+
payload = json.loads(wire)
|
|
266
|
+
assert "labels" in payload, f"missing labels: {payload}"
|
|
267
|
+
assert payload["labels"]["step"] == "42"
|
|
268
|
+
assert payload["labels"]["request_id"] == "req-abc.123"
|
|
269
|
+
# pid on the wire must match the driver subprocess (NOT this
|
|
270
|
+
# test process), since the writer ran inside the subprocess.
|
|
271
|
+
driver_pid_lines = [l for l in lines if l.startswith("DRIVER_PID:")]
|
|
272
|
+
assert driver_pid_lines, "driver did not print DRIVER_PID"
|
|
273
|
+
driver_pid = int(driver_pid_lines[0].split(":", 1)[1].strip())
|
|
274
|
+
assert payload.get("pid") == driver_pid, (
|
|
275
|
+
f"wire pid {payload.get('pid')} != driver pid {driver_pid}"
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
@pytest.mark.skipif(not _have_pip(), reason="pip is required for this E2E test")
|
|
280
|
+
def test_pip_install_from_sdist(tmp_path: Path) -> None:
|
|
281
|
+
"""sdist install path (pip falls back to sdist when no wheel
|
|
282
|
+
matches the platform).
|
|
283
|
+
|
|
284
|
+
Distributors and corporate gates sometimes prefer the sdist.
|
|
285
|
+
If the source distribution is broken (missing files in
|
|
286
|
+
`tool.hatch.build.targets.sdist.include`, etc.), the wheel
|
|
287
|
+
test above still passes but a user on the sdist path is
|
|
288
|
+
stuck. This test pins both paths.
|
|
289
|
+
"""
|
|
290
|
+
pytest.importorskip("build", reason="`build` not installed; skip sdist E2E")
|
|
291
|
+
import build # noqa: F401
|
|
292
|
+
|
|
293
|
+
dist_dir = tmp_path / "dist"
|
|
294
|
+
dist_dir.mkdir()
|
|
295
|
+
|
|
296
|
+
# Build the sdist. `--no-isolation` reuses the current Python
|
|
297
|
+
# environment so the test does not require python3-venv on the
|
|
298
|
+
# host (CI images often skip it). Hatchling is already on the
|
|
299
|
+
# path of any environment that has `build` installed.
|
|
300
|
+
try:
|
|
301
|
+
import hatchling # noqa: F401
|
|
302
|
+
except ImportError:
|
|
303
|
+
pytest.skip("hatchling not importable; cannot run --no-isolation build")
|
|
304
|
+
|
|
305
|
+
result = subprocess.run(
|
|
306
|
+
[
|
|
307
|
+
sys.executable,
|
|
308
|
+
"-m",
|
|
309
|
+
"build",
|
|
310
|
+
"--sdist",
|
|
311
|
+
"--no-isolation",
|
|
312
|
+
"--outdir",
|
|
313
|
+
str(dist_dir),
|
|
314
|
+
str(PKG_DIR),
|
|
315
|
+
],
|
|
316
|
+
capture_output=True,
|
|
317
|
+
text=True,
|
|
318
|
+
timeout=180,
|
|
319
|
+
)
|
|
320
|
+
if result.returncode != 0:
|
|
321
|
+
pytest.fail(
|
|
322
|
+
f"`python -m build --sdist --no-isolation` failed:\n"
|
|
323
|
+
f"stdout:\n{result.stdout}\n"
|
|
324
|
+
f"stderr:\n{result.stderr}"
|
|
325
|
+
)
|
|
326
|
+
|
|
327
|
+
sdists = list(dist_dir.glob("ingero_annotate-*.tar.gz")) + list(
|
|
328
|
+
dist_dir.glob("ingero-annotate-*.tar.gz")
|
|
329
|
+
)
|
|
330
|
+
assert sdists, f"no sdist produced in {dist_dir}; files: {list(dist_dir.iterdir())}"
|
|
331
|
+
|
|
332
|
+
# Install from the sdist into a clean target.
|
|
333
|
+
install_dir = tmp_path / "site"
|
|
334
|
+
install_dir.mkdir()
|
|
335
|
+
result = subprocess.run(
|
|
336
|
+
[
|
|
337
|
+
sys.executable,
|
|
338
|
+
"-m",
|
|
339
|
+
"pip",
|
|
340
|
+
"install",
|
|
341
|
+
"--target",
|
|
342
|
+
str(install_dir),
|
|
343
|
+
"--no-deps",
|
|
344
|
+
"--quiet",
|
|
345
|
+
"--disable-pip-version-check",
|
|
346
|
+
str(sdists[0]),
|
|
347
|
+
],
|
|
348
|
+
capture_output=True,
|
|
349
|
+
text=True,
|
|
350
|
+
timeout=180,
|
|
351
|
+
)
|
|
352
|
+
if result.returncode != 0:
|
|
353
|
+
pytest.fail(
|
|
354
|
+
f"`pip install <sdist>` failed:\n"
|
|
355
|
+
f"stdout:\n{result.stdout}\n"
|
|
356
|
+
f"stderr:\n{result.stderr}"
|
|
357
|
+
)
|
|
358
|
+
|
|
359
|
+
assert (install_dir / "ingero_annotate.py").is_file(), (
|
|
360
|
+
"sdist install did not deposit ingero_annotate.py"
|
|
361
|
+
)
|
|
362
|
+
|
|
363
|
+
# Quick import smoke from the installed location.
|
|
364
|
+
env = {k: v for k, v in os.environ.items() if k != "PYTHONPATH"}
|
|
365
|
+
smoke = subprocess.run(
|
|
366
|
+
[
|
|
367
|
+
sys.executable,
|
|
368
|
+
"-c",
|
|
369
|
+
"import sys; sys.path.insert(0, sys.argv[1]); "
|
|
370
|
+
"from ingero_annotate import AnnotationWriter, encode_annotation; "
|
|
371
|
+
"import ingero_annotate; print(ingero_annotate.__file__)",
|
|
372
|
+
str(install_dir),
|
|
373
|
+
],
|
|
374
|
+
capture_output=True,
|
|
375
|
+
text=True,
|
|
376
|
+
timeout=15,
|
|
377
|
+
env=env,
|
|
378
|
+
)
|
|
379
|
+
if smoke.returncode != 0:
|
|
380
|
+
pytest.fail(
|
|
381
|
+
f"sdist-installed import smoke failed:\n"
|
|
382
|
+
f"stdout:\n{smoke.stdout}\n"
|
|
383
|
+
f"stderr:\n{smoke.stderr}"
|
|
384
|
+
)
|
|
385
|
+
assert str(install_dir) in smoke.stdout, (
|
|
386
|
+
f"sdist-installed module loaded from wrong path: {smoke.stdout!r}"
|
|
387
|
+
)
|
|
388
|
+
|
|
389
|
+
|
|
390
|
+
@pytest.mark.skipif(not _have_pip(), reason="pip is required for this E2E test")
|
|
391
|
+
def test_pip_install_metadata_is_well_formed(tmp_path: Path) -> None:
|
|
392
|
+
"""The PyPI metadata that pip+twine read MUST parse cleanly.
|
|
393
|
+
|
|
394
|
+
A malformed `pyproject.toml` (wrong classifier syntax, missing
|
|
395
|
+
required field, license declared without license-files) often
|
|
396
|
+
`pip install`s fine on the local machine but fails on
|
|
397
|
+
twine upload. This test runs pip with `--dry-run` if available
|
|
398
|
+
OR a minimal metadata-only build to catch those issues before
|
|
399
|
+
they hit PyPI.
|
|
400
|
+
"""
|
|
401
|
+
install_dir = tmp_path / "site"
|
|
402
|
+
install_dir.mkdir()
|
|
403
|
+
|
|
404
|
+
result = subprocess.run(
|
|
405
|
+
[
|
|
406
|
+
sys.executable,
|
|
407
|
+
"-m",
|
|
408
|
+
"pip",
|
|
409
|
+
"install",
|
|
410
|
+
"--target",
|
|
411
|
+
str(install_dir),
|
|
412
|
+
"--no-deps",
|
|
413
|
+
"--quiet",
|
|
414
|
+
"--disable-pip-version-check",
|
|
415
|
+
str(PKG_DIR),
|
|
416
|
+
],
|
|
417
|
+
capture_output=True,
|
|
418
|
+
text=True,
|
|
419
|
+
timeout=180,
|
|
420
|
+
)
|
|
421
|
+
if result.returncode != 0:
|
|
422
|
+
pytest.fail(
|
|
423
|
+
"metadata install failed - pyproject.toml is likely malformed:\n"
|
|
424
|
+
f"stdout:\n{result.stdout}\n"
|
|
425
|
+
f"stderr:\n{result.stderr}"
|
|
426
|
+
)
|
|
427
|
+
|
|
428
|
+
# The dist-info dir is what pip / twine / pypi.org actually use.
|
|
429
|
+
# Parse the METADATA file and verify the v0.1.0 fields are sane.
|
|
430
|
+
dist_info = list(install_dir.glob("ingero_annotate-*.dist-info"))
|
|
431
|
+
assert dist_info, f"no dist-info in {install_dir}"
|
|
432
|
+
metadata = (dist_info[0] / "METADATA").read_text(encoding="utf-8")
|
|
433
|
+
|
|
434
|
+
# The header lines pip parses are case-sensitive; assert the
|
|
435
|
+
# essentials are present and shaped correctly.
|
|
436
|
+
required_lines = [
|
|
437
|
+
"Metadata-Version:",
|
|
438
|
+
"Name: ingero-annotate",
|
|
439
|
+
"Version: ",
|
|
440
|
+
"Summary: ",
|
|
441
|
+
"Requires-Python:",
|
|
442
|
+
]
|
|
443
|
+
for needle in required_lines:
|
|
444
|
+
assert needle in metadata, (
|
|
445
|
+
f"METADATA missing required field {needle!r}; "
|
|
446
|
+
"PyPI upload will reject or warn"
|
|
447
|
+
)
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
"""Regression test: the canonical ingero_annotate.py lives ONLY in
|
|
2
|
+
python/ingero-annotate/. The 5 vendored copies (md5 2aed7ca2)
|
|
3
|
+
previously sat inside each adapter directory and drifted invisibly;
|
|
4
|
+
v0.19 Phase E.2 consolidated them, and this test pins the
|
|
5
|
+
consolidation so a future contributor cannot silently re-vendor.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import hashlib
|
|
11
|
+
import os
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
# Walk up from this test file to the repository root. The package
|
|
16
|
+
# layout is `<repo>/python/ingero-annotate/tests/this_file`, so two
|
|
17
|
+
# `parent` steps land at the package root and a third lands at
|
|
18
|
+
# `<repo>/python/`. We want `<repo>/`.
|
|
19
|
+
REPO_ROOT = Path(__file__).resolve().parents[3]
|
|
20
|
+
CANONICAL = REPO_ROOT / "python" / "ingero-annotate" / "ingero_annotate.py"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def test_canonical_module_exists() -> None:
|
|
24
|
+
assert CANONICAL.is_file(), (
|
|
25
|
+
f"canonical ingero_annotate.py missing at {CANONICAL}"
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def test_no_vendored_copies_in_adapter_dirs() -> None:
|
|
30
|
+
"""No adapter directory may carry its own ingero_annotate.py.
|
|
31
|
+
|
|
32
|
+
Pre-v0.19, five adapters (pytorch-lightning, ray, hf-trainer,
|
|
33
|
+
deepspeed, accelerate) each vendored an identical 296-line copy.
|
|
34
|
+
Maintaining 5 copies of one library is the kind of footgun that
|
|
35
|
+
quietly drifts. v0.19 Phase E.2 deleted them; this test pins
|
|
36
|
+
the deletion.
|
|
37
|
+
"""
|
|
38
|
+
integrations = REPO_ROOT / "examples" / "integrations"
|
|
39
|
+
if not integrations.is_dir():
|
|
40
|
+
# Some packaging layouts may not ship the integrations dir;
|
|
41
|
+
# the canonical module is what matters. Skip gracefully.
|
|
42
|
+
return
|
|
43
|
+
offending: list[str] = []
|
|
44
|
+
for d in integrations.iterdir():
|
|
45
|
+
if not d.is_dir():
|
|
46
|
+
continue
|
|
47
|
+
local = d / "ingero_annotate.py"
|
|
48
|
+
if local.is_file():
|
|
49
|
+
offending.append(str(local.relative_to(REPO_ROOT)))
|
|
50
|
+
assert not offending, (
|
|
51
|
+
"vendored ingero_annotate.py copies still present in:\n "
|
|
52
|
+
+ "\n ".join(offending)
|
|
53
|
+
+ "\nDelete them and import from the ingero-annotate package."
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def test_canonical_module_is_well_formed() -> None:
|
|
58
|
+
"""The canonical module is non-empty and importable as text.
|
|
59
|
+
|
|
60
|
+
A future packaging mistake (empty file, build cleared the module)
|
|
61
|
+
would otherwise let this test suite pass with no actual code.
|
|
62
|
+
"""
|
|
63
|
+
text = CANONICAL.read_text(encoding="utf-8")
|
|
64
|
+
assert len(text) > 1000, f"canonical module is suspiciously small: {len(text)} bytes"
|
|
65
|
+
# The module must expose AnnotationWriter (the public class) and
|
|
66
|
+
# at least one of the protocol helpers. Pinning both catches a
|
|
67
|
+
# broken build (empty file, partial copy) without being too
|
|
68
|
+
# strict about every internal symbol.
|
|
69
|
+
for sym in ("class AnnotationWriter", "def encode_annotation", "def validate_labels"):
|
|
70
|
+
assert sym in text, f"canonical module is missing: {sym}"
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def test_canonical_module_md5_stable() -> None:
|
|
74
|
+
"""Pin the md5 of the canonical module. A future edit MUST update
|
|
75
|
+
this value deliberately; an unintended drift fails the test.
|
|
76
|
+
|
|
77
|
+
The original 5 vendored copies all hashed to 2aed7ca2fc98cfbdfcd1611b7d86a03e.
|
|
78
|
+
The canonical module is byte-identical at consolidation time.
|
|
79
|
+
"""
|
|
80
|
+
h = hashlib.md5(CANONICAL.read_bytes()).hexdigest()
|
|
81
|
+
# Recorded at consolidation time (v0.19 Phase E.2, 2026-05-24).
|
|
82
|
+
expected = "2aed7ca2fc98cfbdfcd1611b7d86a03e"
|
|
83
|
+
assert h == expected, (
|
|
84
|
+
f"canonical module md5 drifted: got {h}, expected {expected}. "
|
|
85
|
+
"Update this pin INTENTIONALLY when you change the module."
|
|
86
|
+
)
|