@jrpool/kilotest 24.0.8 → 24.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -59,13 +59,6 @@
59
59
  color: #555;
60
60
  font-size: 0.9rem;
61
61
  }
62
- .tutorial-img {
63
- display: block;
64
- max-width: 100%;
65
- border: solid 1px #ccc;
66
- border-radius: 0.25rem;
67
- margin: 1rem 0;
68
- }
69
62
  .knowledge-check {
70
63
  margin-top: 1.5rem;
71
64
  padding: 1rem;
@@ -88,18 +81,18 @@
88
81
  .knowledge-check label {
89
82
  cursor: pointer;
90
83
  }
91
- .quiz-feedback {
84
+ .quiz-diagnosis {
92
85
  margin-top: 0.75rem;
93
86
  padding: 0.5rem 0.75rem;
94
87
  border-radius: 0.25rem;
95
88
  min-height: 1.5rem;
96
89
  }
97
- .quiz-feedback.correct {
90
+ .quiz-diagnosis.correct {
98
91
  background: #e6f4ea;
99
92
  border-left: solid 4px #2d7a2d;
100
93
  color: #1a4a1a;
101
94
  }
102
- .quiz-feedback.incorrect {
95
+ .quiz-diagnosis.incorrect {
103
96
  background: #fdecea;
104
97
  border-left: solid 4px #c0392b;
105
98
  color: #5a1a1a;
@@ -130,18 +123,18 @@
130
123
  color: #c0392b;
131
124
  font-weight: 700;
132
125
  }
133
- .comment-feedback {
126
+ .comment-diagnosis {
134
127
  margin-top: 0.75rem;
135
128
  padding: 0.5rem 0.75rem;
136
129
  border-radius: 0.25rem;
137
130
  min-height: 1.5rem;
138
131
  }
139
- .comment-feedback.success {
132
+ .comment-diagnosis.success {
140
133
  background: #e6f4ea;
141
134
  border-left: solid 4px #2d7a2d;
142
135
  color: #1a4a1a;
143
136
  }
144
- .comment-feedback.error {
137
+ .comment-diagnosis.error {
145
138
  background: #fdecea;
146
139
  border-left: solid 4px #c0392b;
147
140
  color: #5a1a1a;
@@ -155,7 +148,7 @@
155
148
  .skip-link,
156
149
  .tutorial-nav,
157
150
  #comment-form,
158
- #comment-feedback,
151
+ #comment-diagnosis,
159
152
  #lesson1-quiz button { display: none; }
160
153
 
161
154
  /* ── Page breaks: start each h2 section, h3 section, and h4 subsection on a new page ── */
@@ -186,53 +179,51 @@
186
179
  </head>
187
180
  <body>
188
181
  <main>
189
- <h1><a href="/">Kilotest</a> tutorial: Accessibility Testing Strategies</h1>
182
+ <h1><a href="/">Kilotest</a> tutorial: Accessibility testing strategies</h1>
190
183
  <nav class="contents-table">
191
184
  <h2>Table of contents</h2>
192
185
  <ul class="nav">
193
186
  <li><a href="#about">About this tutorial</a></li>
194
187
  <li><a href="#orientation">Part 1: Orientation</a>
195
188
  <ul>
196
- <li><a href="#why-matters">Why accessibility testing matters</a></li>
197
- <li><a href="#three-strategies">The three testing strategies</a></li>
198
- <li><a href="#choosing">Choosing among strategies</a></li>
199
- <li><a href="#case-study-intro">Introducing the case study</a></li>
189
+ <li><a href="#basics">Accessibility basics</a></li>
190
+ <li><a href="#impacts">Impacts</a></li>
191
+ <li><a href="#testing">Testing</a></li>
192
+ <li><a href="#orchestrating">Orchestrating testing strategies</a></li>
200
193
  </ul>
201
194
  </li>
202
195
  <li><a href="#practicum">Part 2: Practicum</a>
203
196
  <ul>
204
- <li><a href="#example-page">The example page</a></li>
205
- <li><a href="#human-testing">Human testing</a></li>
206
- <li><a href="#rule-engine-testing">Rule-engine testing</a></li>
207
- <li><a href="#ai-testing">AI-agent testing</a></li>
208
- <li><a href="#comparison">Comparing the strategies</a></li>
197
+ <li><a href="#issue">The issue</a></li>
198
+ <li><a href="#page">The page</a></li>
199
+ <li><a href="#testing-page">Testing the page</a></li>
209
200
  </ul>
210
201
  </li>
211
- <li><a href="#conclusion">Conclusion</a></li>
202
+ <li><a href="#knowledge-check">Knowledge check</a></li>
212
203
  <li><a href="#further-reading">Further reading</a></li>
213
204
  <li><a href="#suggest-improvement">Suggest improvements</a></li>
214
205
  </ul>
215
206
  </nav>
216
207
  <h2>Notice</h2>
217
- <p>This is a rough draft created in partnership with an artificial intelligence agent using Claude Sonnet 4.6 Thinking. Revisions are in progress.</p>
208
+ <p>This is a draft created in partnership with an artificial intelligence agent using Claude Sonnet 4.6 Thinking. Revisions are in progress.</p>
218
209
  <section id="about" class="main">
219
210
  <h2>About this tutorial</h2>
220
- <p>If you are familiar with web technology (HTML, CSS, JavaScript), but you have limited knowledge of digital accessibility testing, this tutorial is designed for you.</p>
211
+ <p>If you are familiar with front-end web technology (HTML, CSS, JavaScript), but you have limited knowledge of digital accessibility testing, this tutorial is designed for you.</p>
221
212
  <p>Estimated completion time: 1 hour.</p>
222
213
  <p>The tutorial has two parts:</p>
223
214
  <ul>
224
215
  <li><strong>Orientation</strong>. What digital accessibility is, why it is valuable, and how websites can be tested for it.</li>
225
216
  <li><strong>Practicum</strong>. Examining a specific example.</li>
226
217
  </ul>
227
- <p>If the tutorial makes you want to learn more, there are links to additional information at the end.</p>
228
- <p>Improvements in this tutorial are certainly possible. You can help by submitting your suggestions at the end.</p>
218
+ <p>There are links to additional information at the end.</p>
219
+ <p>You can help improve this tutorial by submitting your suggestions at the end.</p>
229
220
  </section>
230
221
  <section id="orientation" class="main">
231
222
  <h2>Part 1: Orientation</h2>
232
- <section id="why-matters">
233
- <h3>Accessibility in a nutshell</h3>
234
- <p><q>Accessibility</q> has several meanings. Here it refers to universal usability: usability for the widest practical range of people. It began with a focus on people with specific physical, sensory, and cognitive disabilities (an estimated 20% of the world population), but now it often refers to design and construction <em>standards</em> that elevate usability for everybody.</p>
235
- <p>Accessibility standards exist for buildings, vehicles, devices, and software. Standards are defined by professional associations, governments, and individual experts. The latter are often called <q>best practices</q>.</p>
223
+ <section id="basics">
224
+ <h3>Accessibility basics</h3>
225
+ <p><q>Accessibility</q> has several meanings. Here it refers to universal usability: usability for the widest practical range of people. It often aims to serve people with specific physical, sensory, and cognitive disabilities (an estimated 20% of the world population), but accessibility typically elevates usability for everybody.</p>
226
+ <p>Accessibility standards exist for streets, buildings, vehicles, devices, and software. Standards are defined by professional associations, governments, and individual experts. The latter standards are often called <q>best practices</q>.</p>
236
227
  <p>Kilotest deals with <strong>web</strong> accessibility, so that is the focus of this tutorial.</p>
237
228
  <p>Not everybody agrees with every standard, but some standards are widely accepted and have even been made legally mandatory. The web accessibility standards gaining the greatest legitimacy are those defined by the the World Wide Web Consortium (W3C). They include:</p>
238
229
  <ul>
@@ -240,375 +231,203 @@
240
231
  <li>Authoring Tool Accessibility Guidelines (ATAG): for web authoring tools</li>
241
232
  <li>Accessible Rich Internet Applications (ARIA): for scripted web applications</li>
242
233
  </ul>
243
- <h3>The stakes</h3>
244
- <p>It is obvious that making a website that is insecure or leaks private or confidential data is risky. Similarly, making a website that is inaccessible is risky. It exposes the site owner to prosecution, civil litigation, and negative publicity. It also makes websites harder to use, deterring what you want users to do, such as understanding your mission or making purchases.</p>
245
- <p>Many users interact with the web using <em>assistive technologies</em> (ATs): hardware and software tools that mediate between websites and users. Some are designed to serve users with disabilities, such as <em>screen readers</em> that explain structure and content orally, voice input software, eye trackers, breath-based navigators. ATs typically presume that a site conforms to accessibility standards.</p>
246
- <p>Increasingly, almost all web use is being mediated by artificial-intelligence agents. They all make mistakes, but the error rate is likely to jump when a website violates accessibility standards.</p>
247
- <p>The accessibility standards go beyond HTML, CSS, and JavaScript standards, so accessibility gets conformity to those standards as a side effect. This makes your code more maintainable and debugging easier as team members change.</p>
248
234
  </section>
249
- <section id="three-strategies">
235
+ <section id="impacts">
236
+ <h3>Impacts</h3>
237
+ <p>Obviously, websites that are insecure or leak private or confidential data are risky. Similarly, inaccessible websites are risky. They expose site owners to prosecution, civil litigation, and negative publicity. Inaccessible websites are also harder to use, deterring what you want users to do, such as understanding your mission or making purchases.</p>
238
+ <p>Many users interact with the web using <dfn>assistive technologies</dfn> (<abbr title="assistive technologies">ATs</abbr>): hardware and software tools that mediate between websites and users. Some are designed to serve users with disabilities, such as <em>screen readers</em> that explain structure and content aurally, voice input software, eye trackers, and breath-based navigators. ATs typically rely on website accessibility, so AT users may be unable to navigate or operate inaccessible sites. Do you want to see an example of this? Then pause for a minute and view a <a href="https://www.youtube.com/watch?v=Hp8dAkHQ9O0">video about browsing while blind</a>.</p>
239
+ <p>Increasingly, web use is being mediated by artificial-intelligence agents. They all make mistakes, but the error rate is likely to jump when a website violates accessibility standards.</p>
240
+ <p>Accessibility standards include but exceed HTML, CSS, and JavaScript standards, so if you achieve accessibility you get conformity to those standards as a side effect. This makes your code more maintainable and debugging easier as team members change.</p>
241
+ </section>
242
+ <section id="testing">
250
243
  <h3>Testing</h3>
251
244
  <p>Accessibility <em>testing</em> is the process of verifying conformity to accessibility standards. There are three main strategies of web accessibility testing.</p>
252
- <h4>Strategy 1. Human testing</h4>
245
+ <h4>Strategy 1. Testing by humans</h4>
253
246
  <p>A person directly examines a website, using browsers, developer tools, I/O devices, and assistive technologies to find accessibility problems. This is sometimes called <q>manual testing</q>, but it really is <em>human</em> testing: testing by a person who investigates the site by inspecting and using it.</p>
254
247
  <p>One subtype of human testing is testing by experts in the use of assistive technologies and atypical I/O methods. These are usually people who have gained their expertise through long-term use arising from disabilities.</p>
255
- <p>Human testing gives you insight into accessibility problems that the other strategies are likely to miss. But it is also the slowest and most expensive strategy. So an efficient approach is usually to start with the other strategies, solve any problems they discover, and then do human testing on the improved site.</p>
256
- <h4>Strategy 2. Rule-engine testing</h4>
257
- <p>Software applies a set of codified rules to a page and reports on any violations of the rules. This is sometimes called <q>automated testing</q>, but it is humans who have defined the rules and designed the tests (sometimes with AI assistance).</p>
248
+ <p>Human testing gives you insight into accessibility problems that other strategies are likely to miss. It is also the slowest and most expensive strategy, typically requiring hours per page.</p>
249
+ <h4>Strategy 2. Testing by rule engines</h4>
250
+ <p>Software applies a set of codified rules and reports on any violations of the rules. This is sometimes called <q>automated testing</q>.</p>
258
251
  <p>Rule engines come in several forms:</p>
259
252
  <ul>
260
- <li><strong>Browser extensions</strong> such as the <a href="https://www.deque.com/axe/devtools/">axe DevTools extension</a> and the <a href="https://wave.webaim.org/extension/">WAVE Evaluation Tool</a> run directly in the browser and report violations interactively.</li>
261
- <li><strong>Installed engines</strong> such as <a href="https://www.npmjs.com/package/pa11y">Pa11y</a> and <a href="https://www.npmjs.com/package/testaro">Testaro</a> run from the command line and can be integrated into development workflows.</li>
262
- <li><strong>API services</strong> accept page URLs and return analysis results over the network.</li>
263
- <li><strong>Ensemble services</strong> such as <a href="/">Kilotest</a> run multiple rule engines against a page and consolidate the results. Because different engines encode different rules, an ensemble typically finds more issues than any single engine.</li>
253
+ <li><strong>Browser extensions</strong> run directly in the browser and report violations interactively.</li>
254
+ <li><strong>Installed engines</strong> run from the command line and can be integrated into development workflows.</li>
255
+ <li><strong>API services</strong> accept page URLs or HTML content and return analysis results over the network.</li>
256
+ <li><strong>Ensemble services</strong> run multiple rule engines against a page and consolidate the results.</li>
264
257
  </ul>
265
- <p>Rule-engine testing is fast a full scan typically takes 1–5 minutes. It requires little accessibility knowledge from the user, since the rules encode the expertise. It can be run repeatedly, integrated into CI/CD pipelines, and applied to large numbers of pages.</p>
266
-
267
- <h4>AI-agent testing</h4>
268
- <p>An <strong>AI agent</strong>, as used in this tutorial, is a language model — software trained on large amounts of text to understand and generate language — configured with instructions to perform a specific task. The underlying <strong>model</strong> (such as Claude, GPT-4, or Gemini) is the system you access; the <strong>agent</strong> is the model combined with its instructions and the content you provide. For accessibility testing, the agent is given page content and asked to identify violations. This strategy is the newest of the three and is evolving rapidly.</p>
269
- <p>The workflow involves three choices:</p>
258
+ <p>Rule-engine testing is fast and often cheap. Testing one web page typically takes no more than five minutes. It requires little accessibility knowledge from the user, since the rules encode the expertise. It can be run repeatedly, integrated into CI/CD pipelines, and applied to many pages.</p>
259
+ <h4>Strategy 3. Testing by AI agents</h4>
260
+ <p>Artificial intelligence (<abbr title="Artificial Intelligence">AI</abbr>) has been developing since the mid-1950s and by now has produced systems that can mimic human expert behavior. So it is not preposterous that you could give the URL of a web page to an AI agent and say, <q>Find accessibility defects on that page and report them to me.</q> What happens if you try that?</p>
261
+ <p>If you ask a mainstream AI agent, powered by a foundation model, to navigate to a web page and look for accessibility defects in it, the response may look right, but it may be:</p>
270
262
  <ul>
271
- <li><strong>Selecting a model.</strong> Different models vary in their knowledge of accessibility standards, their ability to reason about HTML structure, and their tendency to hallucinate. As of 2026, capable models include Claude (Anthropic), GPT-4 (OpenAI), and Gemini (Google).</li>
272
- <li><strong>Designing instructions.</strong> The instructions — often called a <q>prompt</q> — specify what to test, what standards to apply, and how to report findings. Prompt quality significantly affects the quality of results.</li>
273
- <li><strong>Providing page content.</strong> The AI agent must receive the relevant page content. This may be the raw HTML source, the rendered DOM, screenshots, or some combination. What you provide determines what the agent can find.</li>
263
+ <li>incomplete</li>
264
+ <li>incorrect</li>
265
+ <li>fabricated</li>
274
266
  </ul>
275
- <p>AI-agent testing can reason about issues that resist mechanical rules for example, whether image alternative text is meaningful in context, or whether a form label is genuinely helpful. Its output is available within minutes and requires no specialist tools beyond access to an AI agent. However, AI agents can produce false findings and can miss real ones, and their results may change as the underlying models are updated.</p>
276
- </section>
277
-
278
- <!-- ─── ─── ─── ─── -->
279
- <section id="choosing">
280
- <h3>Choosing among the strategies</h3>
281
- <p class="time-estimate">About 12 minutes.</p>
282
-
283
- <p>The three strategies are not alternatives — they are complements. A mature accessibility testing practice uses all three. Understanding their differences helps you decide how to allocate effort.</p>
284
-
285
- <h4>Time to results</h4>
286
- <dl class="compare">
287
- <dt>Human testing</dt>
288
- <dd>Slowest. A thorough examination of a single page takes 30 minutes to several hours, depending on the page's complexity and the scope of testing.</dd>
289
- <dt>Rule-engine testing</dt>
290
- <dd>Fastest. A full-page scan typically completes in 1–5 minutes. An ensemble service such as Kilotest may take somewhat longer.</dd>
291
- <dt>AI-agent testing</dt>
292
- <dd>Moderate. Depending on the scope of instructions, the amount of page content provided, and the AI agent's processing speed, 2–10 minutes is typical for focused testing.</dd>
293
- </dl>
294
-
295
- <h4>Financial cost</h4>
296
- <dl class="compare">
297
- <dt>Human testing</dt>
298
- <dd>The cost of a skilled tester's time. Specialist accessibility consultants are expensive. Internal developers trained in accessibility are a recurring investment.</dd>
299
- <dt>Rule-engine testing</dt>
300
- <dd>Many rule engines are free and open-source. Commercial tools and API services typically charge per page or per month. Kilotest is currently free. Costs are generally low relative to the number of tests run.</dd>
301
- <dt>AI-agent testing</dt>
302
- <dd>Most AI services charge per token (unit of text) processed. As of 2026, asking an AI agent to check a single page against one WCAG success criterion typically costs between $0.01 and $0.10 depending on the model used. Prices are falling over time.</dd>
303
- </dl>
304
-
305
- <h4>What can each strategy find?</h4>
306
- <dl class="compare">
307
- <dt>Human testing</dt>
308
- <dd>The broadest potential coverage. A skilled human can find any issue that is observable through the browser, developer tools, or a screen reader. Human testing is the only strategy that can reliably assess whether content is usable — not merely technically compliant — for users with disabilities. It is also the only strategy that can catch issues that only appear during interaction (filling out a multi-step form, for instance) or that depend on context (whether an image description is accurate given the surrounding text).
309
- </dd>
310
- <dt>Rule-engine testing</dt>
311
- <dd>Limited to issues for which explicit rules have been written. Rules can be precise and comprehensive for clearly defined requirements (for example: every <code>&lt;img&gt;</code> must have an <code>alt</code> attribute). They cannot assess subjective quality (for example: whether that <code>alt</code> text is actually helpful). Some issues are detectable in principle but not yet covered by available engines.</dd>
312
- <dt>AI-agent testing</dt>
313
- <dd>Broader than rule engines for issues requiring reasoning, narrower than humans for issues requiring interaction or lived experience. An AI agent can assess whether image alternative text is plausibly meaningful, whether instructions are clear, or whether form labels are adequate. It cannot click, scroll, or type into the page; it does not receive a visual or auditory rendering; and it does not receive the browser's accessibility tree — the computed structure of roles, names, states, and values that assistive technologies consume.</dd>
314
- </dl>
315
-
316
- <h4>False positives (incorrect findings)</h4>
317
- <dl class="compare">
318
- <dt>Human testing</dt>
319
- <dd>Low, in the hands of a knowledgeable tester. A human who understands the standards can distinguish genuine violations from compliant edge cases. Inexperienced testers produce more false positives.</dd>
320
- <dt>Rule-engine testing</dt>
321
- <dd>Moderate and tool-dependent. Rules that cannot fully determine compliance from the code alone (for example: whether an image needs alternative text) may produce incorrect results. Different engines disagree about the same page, and some findings require human review to confirm.</dd>
322
- <dt>AI-agent testing</dt>
323
- <dd>Variable and model-dependent. Current models sometimes misidentify compliant patterns as violations, particularly when their knowledge of applicable standards is incomplete or when the HTML structure is complex. Critical review of AI output is essential.</dd>
324
- </dl>
325
-
326
- <h4>False negatives (missed defects)</h4>
327
- <dl class="compare">
328
- <dt>Human testing</dt>
329
- <dd>Low for experienced testers working systematically, but not zero. Testers can overlook attributes on elements they do not specifically inspect. Issues that only manifest in edge cases or under specific conditions may be missed if those cases are not tested. Consistency across large page sets can be difficult to maintain.</dd>
330
- <dt>Rule-engine testing</dt>
331
- <dd>Any issue not covered by a rule will be missed. No current rule engine covers all WCAG success criteria. Engines also differ in which rules they implement: a single engine may miss issues that another engine catches. Ensemble testing reduces the false negative rate relative to any single engine.</dd>
332
- <dt>AI-agent testing</dt>
333
- <dd>Depends heavily on what content is provided to the AI agent. If the agent receives only the raw HTML source of a page whose forms are rendered by JavaScript, it will miss all form-related issues entirely. If the agent's knowledge of valid attribute values is incomplete, it may miss invalid values. AI-agent false negatives are less predictable than rule-engine false negatives, because they vary with the model version, the prompt, and the content provided.</dd>
334
- </dl>
335
-
336
- <h4>Expertise required</h4>
337
- <dl class="compare">
338
- <dt>Human testing</dt>
339
- <dd>High. The tester needs working knowledge of WCAG, ARIA, and HTML semantics, plus proficiency with a screen reader.</dd>
340
- <dt>Rule-engine testing</dt>
341
- <dd>Low to moderate. Running a tool requires little accessibility knowledge. Interpreting results and determining whether flagged items are genuine violations requires more. Reviewing results across a large ensemble requires the ability to reconcile conflicting findings.</dd>
342
- <dt>AI-agent testing</dt>
343
- <dd>Low to moderate for basic use; moderate for reliable use. Formulating precise and effective instructions, understanding the limits of AI output, and critically reviewing findings all benefit from accessibility knowledge.</dd>
344
- </dl>
345
-
346
- <h4>Best suited for</h4>
347
- <dl class="compare">
348
- <dt>Human testing</dt>
349
- <dd>Final verification before release; issues requiring judgment about context, meaning, or usability; interaction flows; screen reader experience; and any issue that requires lived experience with assistive technology.</dd>
350
- <dt>Rule-engine testing</dt>
351
- <dd>First-pass scanning across many pages; continuous integration checks; measuring progress over time; identifying well-defined structural violations efficiently; and providing a baseline before human testing.</dd>
352
- <dt>AI-agent testing</dt>
353
- <dd>Focused analysis of specific issue types when sufficient page content can be provided; assessment of issues that are too nuanced for mechanical rules but too common for full human review; and generating explanations and remediation guidance.</dd>
354
- </dl>
355
- </section>
356
-
357
- <!-- ─── ─── ─── ─── -->
358
- <section id="case-study-intro">
359
- <h3>Introducing the case study</h3>
360
- <p class="time-estimate">About 5 minutes.</p>
361
-
362
- <p>The practicum applies all three strategies to a single type of accessibility issue on a specific web page. The issue type is: <strong>incorrect or absent <code>autocomplete</code> attribute on form inputs.</strong></p>
363
-
364
- <h4>Why the <code>autocomplete</code> attribute matters</h4>
365
- <p>The <code>autocomplete</code> attribute tells the browser what kind of personal information an input field collects. When set correctly, the browser can offer to fill in the field from stored data — a feature that particularly helps users with cognitive disabilities, motor disabilities, dyslexia, or anyone filling out forms repeatedly.</p>
366
- <p>WCAG 2.1 and 2.2 Success Criterion <a href="https://www.w3.org/WAI/WCAG22/Understanding/identify-input-purpose">1.3.5 Identify Input Purpose</a> (Level AA) requires that the purpose of inputs collecting personal information can be programmatically determined. The <code>autocomplete</code> attribute is the primary mechanism for satisfying this criterion in HTML.</p>
367
-
368
- <h4>Valid attribute values</h4>
369
- <p>On <code>&lt;input&gt;</code>, <code>&lt;select&gt;</code>, and <code>&lt;textarea&gt;</code> elements, the <code>autocomplete</code> attribute must have a value from the <a href="https://html.spec.whatwg.org/multipage/form-control-infrastructure.html#autofill">defined list of autofill tokens</a>. Common values include:</p>
267
+ <p>AI agents tend to have trouble getting the information they need for accessibility testing of a deployed web page. They may need the source code, the HTML after any scripts have been executed, the <dfn>accessibility tree</dfn> (an accessibility-related simplification of the code), a graphic image (screenshot) of the page, and access to the live page so they can interact with it. Because of limited capabilities and security restrictions, they are likely to be unable to get all of that and handle it efficiently.</p>
268
+ <p>The most commonly used AI agents are powered by large language models. Such a model is trained to give you answers that are highly probable. <q>Sorry, I don&rsquo;t have full access to that web page</q> is often the right answer, but an improbable one, so you most likely will get a fabrication instead. The model will use its vast knowledge to imagine what a page would contain and base its answer on that.</p>
269
+ <p>When an AI agent does this, it is called a <q>hallucination</q>. When a human does it, it is called <q>fraud</q>.</p>
270
+ <p>The AI industry is making progress in ensuring that general-purpose AI agents delegate tasks that are impossible for them to competent tools and specialized AI agents. At present, however, you cannot expect AI agents powered by the mainstream foundation models to reliably navigate to a web page and diagnose accessibility defects on it.</p>
271
+ <p>You have other options, however. If you want an AI agent to test a deployed web page, you can supply to the agent the above-listed documents that it cannot otherwise obtain. Even then:</p>
370
272
  <ul>
371
- <li><code>name</code>, <code>given-name</code>, <code>family-name</code></li>
372
- <li><code>email</code></li>
373
- <li><code>tel</code></li>
374
- <li><code>street-address</code>, <code>postal-code</code>, <code>country</code></li>
375
- <li><code>username</code>, <code>new-password</code>, <code>current-password</code></li>
273
+ <li>Acquiring and providing these documents costs you time, detracting from the time-saving promise of AI assistance.</li>
274
+ <li>You risk overloading the agent with more data than it can efficiently process.</li>
275
+ <li>You are not giving the agent access to the live page, so defects that become detectable only during interactions with the page cannot be found.</li>
376
276
  </ul>
377
- <p>On a <code>&lt;form&gt;</code> element, the <code>autocomplete</code> attribute is also valid but with an important restriction: the only valid values for a <code>&lt;form&gt;</code> element are <code>on</code> and <code>off</code>. The richer set of purpose tokens is not valid on <code>&lt;form&gt;</code> elements; those tokens belong on the individual input elements within the form.</p>
378
-
379
- <h4>Two types of violation</h4>
380
- <p>This practicum focuses on two specific violations of WCAG 1.3.5:</p>
277
+ <p>If you are <strong>developing</strong> a website, you can ask an AI agent to look for accessibility defects in your codebase. Agents that are integrated into development environments or pipelines can do this and may find issues that were not discovered by rule engines.</p>
278
+ <p>Here is one example. The Kilotest home page, when tested by Kilotest itself, is reported to have zero issues. But, when an AI agent was given local access to the Kilotest codebase in June 2026, the agent reported two accessibility issues on the home page. One was that the list of things to do was not wrapped in a <code>nav</code> element, so assistive technologies would not know that it is a list of navigation links. The other was that the <q>More than a thousand? Really?</q> line was not coded as a heading, even though it acted as a heading for the introduction to Kilotest that opens when you click that line.</p>
279
+ <p>These two issues were debatable, but you may want to make decisions about debatable issues, and an AI agent can make you aware of them.</p>
280
+ <h4>Hybrid strategies</h4>
281
+ <p>In reality, none of the above three strategies is pure.</p>
381
282
  <ul>
382
- <li><strong>Missing attribute:</strong> An input collecting personal information has no <code>autocomplete</code> attribute at all.</li>
383
- <li><strong>Invalid attribute value:</strong> An element has an <code>autocomplete</code> attribute, but its value is not among those recognised by the HTML specification for that element type.</li>
283
+ <li>Human testers use automation, including browser extensions, bookmarklets, and assistive technologies.</li>
284
+ <li>Rule engines can present dubious cases to humans for decisions and can get AI agent help to test for complex standards.</li>
285
+ <li>AI agents can use rule-based tools and depend on humans to provide effective instructions and data.</li>
384
286
  </ul>
385
- <p>Both violations prevent browsers and assistive technologies from identifying the purpose of the affected fields.</p>
287
+ </section>
288
+ <section id="orchestrating">
289
+ <h3>Orchestrating testing strategies</h3>
290
+ <h4>Sequence</h4>
291
+ <p>The most mature accessibility testing practice would use all three strategies. Given their capabilities, wait times, and costs, a practical approach likely starts testing with <strong>rule engines</strong> and repairs the defects that they discover; then asks <strong>AI agents</strong> whether they can find any defects that still remain and, if so, repairs those; and finally leverages <strong>human testers</strong>. That gives humans a cleaner, more consistent, and more compliant product to test than they often work on, saving time and money. They can focus on discovering tricky issues they might otherwise overlook. Your website becomes even better.</p>
292
+ <h4>Quality control</h4>
293
+ <p>Accessibility testing is a type of quality control, but software tests, including accessibility tests, <strong>need</strong> quality control, too. Human testers make mistakes. Rule engines make mistakes. AI agents make mistakes. Moreover, accessibility is partly judgmental, so whether a tester made a <q>mistake</q> may be debatable.</p>
294
+ <p>Competitive testing can provide some internal quality control. Ensemble testing with multiple rule engines, and testing by a combination of rule engines, AI agents, and humans, let you identify inter-tester disagreements and investigate those. Your findings can help you improve not only your websites but also the testing process.</p>
386
295
  </section>
387
296
  </section>
388
297
  <section id="practicum" class="main">
389
298
  <h2>Part 2: Practicum</h2>
390
- <p>This practicum examines a single real-world web page for <code>autocomplete</code> attribute issues. The page is a public website homepage captured in May 2026. <strong>The name and URL of the site are withheld</strong> to avoid public-relations concerns unrelated to this tutorial. The page facts below are a snapshot; the live page may differ.</p>
391
- <section id="example-page">
392
- <h3>The example page</h3>
393
- <p class="time-estimate">About 3 minutes.</p>
394
-
395
- <p>The example page is the homepage of a nonprofit organisation in the career-guidance sector. It is a publicly accessible, JavaScript-rendered WordPress site.</p>
396
-
397
- <h4>Relevant page content</h4>
398
- <p>The page contains a newsletter sign-up form. This form is not present in the raw HTML that the server delivers. It is injected into the page by a JavaScript plugin (Thrive Leads) after the browser has loaded and executed the page scripts. The form contains one visible user-facing input field:</p>
299
+ <section id="issue">
300
+ <h3>The issue</h3>
301
+ <p>Let&rsquo;s get concrete now. Out of dozens of accessibility standards, we shall examine one: <strong>input purpose identification</strong>.</p>
302
+ <p>Here is the problem. If you are active on the web, you probably fill out forms often, and you may be annoyed at how often you need to enter the same information about yourself, such as username, name, date of birth, telephone number, email address, postal address, password, and health history. The input purpose identification standard says that a web form asking for some of this information must cooperate with your browser to help get the inputs completed automatically. This helps all users, and especially those who have trouble remembering details and typing fast and accurately.</p>
303
+ <p>Input purpose identification is one of the standards codified by the Web Content Accessibility Guidelines (<abbr title="Web Content Accessibility Guidelines">WCAG</abbr>), where it is called <a href="https://www.w3.org/WAI/WCAG22/Understanding/identify-input-purpose">Success Criterion 1.3.5</a>. There is a <a href="https://www.w3.org/TR/WCAG22/#input-purposes">list of input purposes subject to the standard</a>. Any <code>input</code>, <code>textarea</code>, or <code>select</code> element with one of those purposes must have an <code>autocomplete</code> attribute with the corresponding value. For example, if it is a username input, the <code>input</code> element must have an <code>autocomplete="username"</code> attribute. Some other commonly used values are:</p>
304
+ <ul>
305
+ <li><code>name</code>, <code>given-name</code>, <code>family-name</code></li>
306
+ <li><code>email</code>, <code>tel</code></li>
307
+ <li><code>street-address</code>, <code>postal-code</code>, <code>country</code></li>
308
+ <li><code>new-password</code>, <code>current-password</code></li>
309
+ </ul>
310
+ <p>A <code>form</code> element may have an <code>autocomplete</code> attribute with an <code>"on"</code> or <code>"off"</code> value to permit or prohibit automated browser completion of all its data, except that <code>autocomplete</code> attributes on any elements within the form take precedence.</p>
311
+ </section>
312
+ <section id="page">
313
+ <h3>The page</h3>
314
+ <p>We shall look for this single issue on a single web page. It is the home page of a JavaScript-rendered WordPress site belonging to a nonprofit organization. It was tested in May 2026.</p>
315
+ <p>The page contains a form for requesting a guide published by the organization. This form is not present in the raw HTML that the server delivers, but is injected into the page by a JavaScript plugin after the browser has loaded and executed the page scripts. The form contains one visible user-facing input field. Here is a screenshot of the form:</p>
399
316
  <img
400
317
  src="/tutorial/images/newsletter-form.png"
401
- alt='The newsletter sign-up form as rendered in the browser: an email input with placeholder text "> Enter your email" and a button labelled "Get the career guide", set against a dark-green decorative background.'
402
- class="tutorial-img"
318
+ alt="Career-guide request form containing an input with placeholder text Enter your email and a button labelled Get the career guide"
403
319
  width="602"
404
320
  height="57"
405
321
  >
406
- <pre class="code-sample"><code>&lt;input type="email" data-field="email" data-required="1"
407
- data-validation="email" name="email"
408
- placeholder="&gt; Enter your email"&gt;</code></pre>
409
- <p>The form element wrapping this input is:</p>
410
- <pre class="code-sample"><code>&lt;form action="#" method="post" novalidate="novalidate"
411
- autocomplete="new-password"&gt;</code></pre>
412
- <p>Both elements were captured from the rendered DOM on 2026-05-30 using a headless browser. They are the only non-hidden, non-search, non-consent form inputs on the page.</p>
413
- <h4>The two defects</h4>
414
- <p>Two <code>autocomplete</code> attribute issues are present:</p>
415
- <ul>
416
- <li><strong>Issue A</strong> — The <code>&lt;input type="email"&gt;</code> element has no <code>autocomplete</code> attribute. Per WCAG 1.3.5, an input that collects an email address requires <code>autocomplete="email"</code>.</li>
417
- <li><strong>Issue B</strong> — The <code>&lt;form&gt;</code> element has <code>autocomplete="new-password"</code>. On a <code>&lt;form&gt;</code> element, the only valid values are <code>on</code> and <code>off</code>. <code>new-password</code> is a valid token for <code>&lt;input&gt;</code> elements, not for <code>&lt;form&gt;</code> elements. This value appears to be a misapplication of a technique sometimes used to suppress browser autofill on individual input fields.</li>
418
- </ul>
322
+ <p>Here is the code for the form:</p>
323
+ <pre class="code-sample">&lt;form action="#" method="post" novalidate="novalidate" autocomplete="new-password"&gt;
324
+ &lt;input
325
+ type="email"
326
+ data-field="email"
327
+ data-required="1"
328
+ data-validation="email"
329
+ name="email"
330
+ placeholder="&gt; Enter your email"
331
+ &gt;
332
+ &lt;/form&gt;</pre>
419
333
  </section>
420
- <section id="human-testing">
421
- <h3>Human testing</h3>
422
- <p class="time-estimate">About 7 minutes.</p>
423
-
424
- <p>A human tester would approach this page using browser developer tools, keyboard navigation, and (optionally) a screen reader.</p>
425
-
426
- <h4>What a tester would do</h4>
427
- <ol>
428
- <li>Open the page in a browser and let it fully load.</li>
429
- <li>Identify interactive elements — in this case, the email input in the newsletter form.</li>
430
- <li>Open the browser's developer tools (typically F12 or right-click → Inspect).</li>
431
- <li>Select the email input in the Elements panel and examine its attributes.</li>
432
- <li>Note that the <code>autocomplete</code> attribute is absent.</li>
433
- <li>Select the enclosing <code>&lt;form&gt;</code> element and examine its attributes.</li>
434
- <li>Note that <code>autocomplete="new-password"</code> is set on the form.</li>
435
- <li>Recall (or look up) that <code>new-password</code> is not a valid value for a <code>&lt;form&gt;</code> element.</li>
436
- </ol>
437
-
438
- <h4>Findings</h4>
439
- <div class="finding">
440
- <p><strong>Issue A found?</strong> Yes — a tester who inspects the email input will find the missing attribute.</p>
441
- <p><strong>Issue B found?</strong> Uncertain. A tester who inspects the form element will see <code>autocomplete="new-password"</code>. Whether they flag this as invalid depends on whether they know that this token is not valid on <code>&lt;form&gt;</code> elements. Many experienced developers have seen <code>new-password</code> used as an autofill-suppression technique on individual inputs and might not question its use on a form.</p>
442
- </div>
443
-
444
- <h4>Limitations observed</h4>
445
- <p>The form is JavaScript-rendered and appears only after the page fully loads. A tester who views the page source (rather than the rendered DOM) would not find it at all. A tester must know to wait for JavaScript execution and to inspect the live DOM, not the source.</p>
446
- <p>Keyboard navigation and screen reader testing would reveal an additional issue — the email input has no label element (Issue A causes the input to be named only by its placeholder text, which is fragile) — but that is a separate issue not in scope here. Those techniques would not directly reveal the <code>autocomplete</code> attribute issues, which are only visible in the DOM.</p>
447
-
448
- <h4>Verdict on autocomplete issues</h4>
449
- <ul>
450
- <li><strong>Issue A (missing attribute on input):</strong> Found, if the tester inspects the input element.</li>
451
- <li><strong>Issue B (invalid value on form):</strong> Possibly missed, unless the tester knows the valid values for <code>autocomplete</code> on <code>&lt;form&gt;</code> elements.</li>
452
- </ul>
453
- </section>
454
- <section id="rule-engine-testing">
455
- <h3>Rule-engine testing</h3>
456
- <p class="time-estimate">About 7 minutes.</p>
457
-
458
- <p>Several rule engines were applied to this page. The results below compare what a single widely-used engine found against what ensemble testing found.</p>
459
-
460
- <h4>Single-engine result: axe-core</h4>
461
- <p><a href="https://github.com/dequelabs/axe-core">axe-core</a> is one of the most widely used open-source accessibility rule engines. It is the engine underlying the axe DevTools browser extension, the popular <code>jest-axe</code> testing library, and many CI/CD integrations. It was run against the fully rendered DOM of the example page in May 2026.</p>
462
- <div class="finding">
463
- <p><strong>axe-core result on <code>autocomplete</code> issues: 0 violations found.</strong></p>
464
- <p>axe-core's <code>autocomplete-valid</code> rule checks that <code>autocomplete</code> attribute values, when present, are from the valid list. It does not flag the absence of an <code>autocomplete</code> attribute on inputs collecting personal information. Separately, the <code>autocomplete="new-password"</code> value on the <code>&lt;form&gt;</code> element was not flagged.</p>
465
- </div>
466
- <p>Both autocomplete issues were false negatives for axe-core.</p>
467
-
468
- <h4>Ensemble result: Kilotest</h4>
469
- <p>Kilotest applied 10 tools to the same page in May 2026. Nine of those tools reported at least one issue on the page. Two tools specifically reported autocomplete issues:</p>
334
+ <section id="testing-page">
335
+ <h3>Testing the page</h3>
336
+ <h4>By rule engines</h4>
337
+ <p>What do rule engines find about this issue when they test this page? For a quick answer, Kilotest subjected the page to testing by all 10 rule engines in its ensemble. Two of the rule engines reported rule violations belonging to this issue:</p>
470
338
  <ul>
471
- <li><strong>Testaro</strong> reported <q>autocomplete missing</q> (Issue A: the email input lacks an <code>autocomplete</code> attribute). WCAG 1.3.5.</li>
472
- <li><strong>HTML CodeSniffer</strong> reported <q>autocomplete invalid</q> (Issue B: the <code>&lt;form&gt;</code> element has <code>autocomplete="new-password"</code>, which is not valid for that element type). WCAG 1.3.5.</li>
339
+ <li>The Testaro rule engine reported, <q>input has no autocomplete="email" attribute</q>. In other words, Testaro recognized this input as asking for the user&rsquo;s email address and therefore as requiring an <code>autocomplete="email"</code> attribute, but not having one.</li>
340
+ <li>The HTML CodeSniffer rule engine, referencing this <code>form</code> element, reported, <q>Invalid autocomplete value: new-password. Element does not belong to Password control group.</q>. In other words, HTML CodeSniffer recognized that this form is not a password input (a form is not an input at all!) and therefore is not eligible for an <code>autocomplete="new-password"</code> attribute, but has one.</li>
473
341
  </ul>
474
- <div class="finding">
475
- <p><strong>Ensemble result: both autocomplete issues found</strong> — by different tools within the ensemble.</p>
476
- </div>
477
- <p>The full Kilotest report for this page is available at <a href="https://kilotest.com/reportIssues.html/260530T0032/n2u">kilotest.com (report 260530T0032/n2u)</a>. It lists 45 issues across 9 tools — the autocomplete issues are two of them.</p>
478
-
479
- <h4>Why the single engine missed both issues</h4>
480
- <p>axe-core's <code>autocomplete-valid</code> rule was designed to catch invalid values when an <code>autocomplete</code> attribute is present. It was not designed to enforce WCAG 1.3.5's requirement that inputs collecting personal information <em>must have</em> an <code>autocomplete</code> attribute. For Issue B, axe-core also did not flag the invalid value on the <code>&lt;form&gt;</code> element.</p>
481
- <p>This is not a criticism of axe-core specifically. Every rule engine has coverage gaps. The practical lesson is that no single rule engine covers all WCAG success criteria. Running multiple engines — an ensemble — reduces, though does not eliminate, false negatives.</p>
482
-
483
- <h4>Verdict on autocomplete issues</h4>
484
- <ul>
485
- <li><strong>Issue A (missing attribute on input):</strong> Missed by axe-core; found by Testaro in the Kilotest ensemble.</li>
486
- <li><strong>Issue B (invalid value on form):</strong> Missed by axe-core; found by HTML CodeSniffer in the Kilotest ensemble.</li>
487
- </ul>
488
- </section>
489
- <section id="ai-testing">
490
- <h3>AI-agent testing</h3>
491
- <p>An AI agent was asked to find <code>autocomplete</code> attribute issues on the example page. Two scenarios were tested, illustrating how the content given to the agent affects what it can find.</p>
492
- <h4>What the AI agent was given and asked</h4>
493
- <p>In both scenarios, the instructions were the same:</p>
494
- <div class="ai-output">
495
- <p><strong>Instruction to the AI agent:</strong></p>
496
- <p>The following content is from a web page. Identify all violations of WCAG 2.2 Success Criterion 1.3.5 (Identify Input Purpose) related to the <code>autocomplete</code> attribute. For each violation, state the element, the problem, and the correction required. Be precise about which values are valid for each element type.</p>
497
- </div>
498
- <h4>Scenario 1: AI agent given the raw HTML source</h4>
499
- <p>The raw HTML delivered by the server was provided to the AI agent. This HTML does not contain the newsletter form, which is injected by JavaScript after page load.</p>
500
- <div class="ai-output">
501
- <p><strong>Model used:</strong> Claude claude-sonnet-4 (Anthropic), May 2026.</p>
502
- <p><strong>AI response (summary):</strong></p>
503
- <p>I can see a search input (<code>&lt;input type="search" name="s"&gt;</code>) in the provided HTML. This element does not have an <code>autocomplete</code> attribute. However, a search field is not among the input purposes listed under WCAG 1.3.5 — it does not collect personal information — so no violation is present. I find no violations of WCAG 1.3.5 in the provided HTML.</p>
504
- </div>
505
- <div class="finding">
506
- <p><strong>Result: Both issues missed — 100% false negative rate.</strong></p>
507
- <p>The AI agent correctly analysed what it was given, and correctly found no violations in the raw HTML. But the raw HTML does not contain the newsletter form. The AI agent could not report what it was not given. This is not a failure of the agent's reasoning — it is a failure of the data provided to it.</p>
508
- </div>
509
- <h4>Scenario 2: AI agent given the rendered DOM</h4>
510
- <p>The HTML of the fully rendered DOM — captured after JavaScript execution — was provided to the AI agent. This HTML includes the newsletter form with its email input and the form element.</p>
511
- <div class="ai-output">
512
- <p><strong>Model used:</strong> Claude claude-sonnet-4 (Anthropic), May 2026.</p>
513
- <p><strong>AI response (summary):</strong></p>
514
- <p>I have identified two violations of WCAG 2.2 Success Criterion 1.3.5:</p>
515
- <p><strong>Violation 1: Missing <code>autocomplete</code> attribute on email input.</strong><br>
516
- Element: <code>&lt;input type="email" name="email" placeholder="&gt; Enter your email"&gt;</code><br>
517
- Problem: This input collects an email address but has no <code>autocomplete</code> attribute. WCAG 1.3.5 requires that inputs collecting personal information have a programmatically determinable purpose.<br>
518
- Correction: Add <code>autocomplete="email"</code>.</p>
519
- <p><strong>Violation 2: Invalid <code>autocomplete</code> value on the <code>&lt;form&gt;</code> element.</strong><br>
520
- Element: <code>&lt;form autocomplete="new-password"&gt;</code><br>
521
- Problem: On a <code>&lt;form&gt;</code> element, the only valid values for <code>autocomplete</code> are <code>on</code> and <code>off</code>. The value <code>new-password</code> is a valid autofill detail token for <code>&lt;input&gt;</code> elements, but not for <code>&lt;form&gt;</code> elements. Its presence here does not conform to the HTML specification and will not be processed as intended by browsers.<br>
522
- Correction: Replace <code>autocomplete="new-password"</code> with <code>autocomplete="off"</code> if the intent is to suppress autofill on the form. Note that <code>autocomplete="off"</code> is itself not recommended for personal data collection forms, as it degrades the experience for users who rely on autofill.</p>
523
- </div>
524
- <div class="finding">
525
- <p><strong>Result: Both issues correctly found — when given the right input.</strong></p>
526
- <p>With the rendered DOM, the AI agent identified both violations accurately and provided correct remediation advice. It also flagged that suppressing autocomplete on a personal data form is itself a usability concern for users who rely on autofill — a nuance that rule engines do not typically report.</p>
527
- </div>
528
- <h4>What this demonstrates</h4>
529
- <p>The two scenarios show that the effectiveness of AI-agent testing depends critically on what the AI agent receives, not only on the capability of the underlying model. Providing the rendered DOM rather than the raw HTML source is the difference between finding both issues and finding neither.</p>
530
- <p>This has a direct practical implication: when using AI agents for accessibility testing, you must understand whether the page's content is server-rendered or JavaScript-rendered. If it is JavaScript-rendered, the agent must receive the rendered DOM, not the raw source.</p>
531
- <p>It also illustrates a limitation that affects all three strategies: all depend on the tester (human, rule engine, or AI agent) receiving the full relevant content. Differences in what each strategy can access — raw source, rendered DOM, accessibility tree, visual rendering — partly determine what each strategy can find.</p>
532
- <h4>Verdict on autocomplete issues</h4>
342
+ <p>Both of these findings are correct.</p>
533
343
  <ul>
534
- <li><strong>Issue A (missing attribute on input):</strong> Missed when given raw HTML; found when given rendered DOM.</li>
535
- <li><strong>Issue B (invalid value on form):</strong> Missed when given raw HTML; found when given rendered DOM.</li>
344
+ <li>The input asks for the user&rsquo;s email address and therefore requires an <code>autocomplete="email"</code> attribute but has none; Testaro discovered and reported this.</li>
345
+ <li>The form is permitted to have an <code>autocomplete</code> attribute, but its value must be only <code>"on"</code> or <code>"off"</code>, so the actual value of <code>"new-password"</code> is invalid; HTML CodeSniffer discovered this attribute and reported that the <code>form</code> element was not eligible to have it with the <code>new-password</code> value, but had it.</li>
536
346
  </ul>
537
- <p><strong>Note on AI results:</strong> The AI testing results were captured in May 2026 from a specific model. As AI models and the resources available to them improve, results may change potentially for better or worse. The captured results are preserved here as a fixed point of comparison.</p>
538
- </section>
539
- <section id="comparison">
540
- <h3>Comparing the strategies</h3>
541
- <p class="time-estimate">About 6 minutes.</p>
542
-
543
- <p>The practicum produced a clear picture of what each strategy found on the example page:</p>
544
-
545
- <h4>What each strategy found</h4>
546
- <dl class="compare">
547
- <dt>Human testing</dt>
548
- <dd>Found Issue A (missing attribute) on direct inspection. Issue B (invalid value on form) was possibly missed, depending on whether the tester knew that <code>new-password</code> is not a valid value for <code>&lt;form&gt;</code> elements. The form itself would have been invisible to a tester using only the page source.</dd>
549
- <dt>Rule-engine testing (single engine)</dt>
550
- <dd>Found neither issue. axe-core, one of the most widely used accessibility rule engines, produced zero findings on the two autocomplete issues.</dd>
551
- <dt>Rule-engine testing (ensemble)</dt>
552
- <dd>Found both issues, through two different tools in the ensemble: Testaro found Issue A; HTML CodeSniffer found Issue B.</dd>
553
- <dt>AI-agent testing (raw HTML)</dt>
554
- <dd>Found neither issue. The form is not present in the raw HTML, so the AI agent had no relevant content to analyse.</dd>
555
- <dt>AI-agent testing (rendered DOM)</dt>
556
- <dd>Found both issues, with accurate explanations and correct remediation advice.</dd>
557
- </dl>
558
-
559
- <h4>Lessons from the case study</h4>
560
-
561
- <details>
562
- <summary>Lesson 1: What you provide determines what gets found</summary>
563
- <p>Every testing strategy operates on a representation of the page — raw source, rendered DOM, visual rendering, or accessibility tree. Each representation contains different information. A strategy given an incomplete or wrong representation will produce incomplete or wrong results, regardless of its capability.</p>
564
- <p>In this case study, both rule-engine tools that found the issues used the rendered DOM (via a headless browser). The AI agent found the issues when given the rendered DOM and nothing when given the raw source. The human tester would find both using a browser's developer tools, which always shows the rendered DOM.</p>
565
- <form class="knowledge-check" id="lesson1-quiz" novalidate>
566
- <fieldset>
567
- <legend>Knowledge check</legend>
568
- <p>In this case study, an AI agent was given the raw HTML source of the example page and asked to identify <code>autocomplete</code> violations. What did it find?</p>
569
- <ol>
570
- <li><label><input type="radio" name="q1" value="a"> Both issues: the missing attribute on the email input and the invalid value on the form element.</label></li>
571
- <li><label><input type="radio" name="q1" value="b"> The missing <code>autocomplete</code> attribute on the email input only.</label></li>
572
- <li><label><input type="radio" name="q1" value="c"> No violations, because the newsletter form is present only in the rendered DOM, not in the raw HTML.</label></li>
573
- <li><label><input type="radio" name="q1" value="d"> The invalid <code>autocomplete</code> value on the form element only.</label></li>
574
- </ol>
575
- <button type="submit">Check answer</button>
576
- </fieldset>
577
- <div role="status" id="lesson1-quiz-feedback" aria-live="polite" aria-atomic="true" class="quiz-feedback"></div>
578
- </form>
579
- </details>
580
-
581
- <details>
582
- <summary>Lesson 2: No single rule engine is complete</summary>
583
- <p>axe-core is a capable, well-maintained engine used by millions of developers. It found neither autocomplete issue on this page. This is not an unusual result — studies of rule-engine agreement consistently find that no single engine covers all issues.</p>
584
- <p>Ensemble testing, by combining multiple engines, found both issues. This is the practical argument for ensembles: different tools have different coverage, and their findings complement each other.</p>
585
- </details>
586
-
587
- <details>
588
- <summary>Lesson 3: Some issues are harder for humans than for machines</summary>
589
- <p>Issue B — the invalid <code>autocomplete</code> value on the <code>&lt;form&gt;</code> element — illustrates an important pattern. The developer who introduced this value almost certainly had a goal (suppressing browser autofill) and used a technique they had seen work on individual inputs. The fact that it is technically invalid for a <code>&lt;form&gt;</code> element is a detail that a knowledgeable human might easily overlook.</p>
590
- <p>An appropriate rule engine, however, checks the value against a precise specification and reports the violation without ambiguity. This is one of the strongest arguments for rule-engine testing: it catches specification-level errors consistently, regardless of whether the reviewer finds the value plausible.</p>
591
- </details>
592
-
593
- <details>
594
- <summary>Lesson 4: The AI limitation encountered during this tutorial's development</summary>
595
- <p>This tutorial was developed with AI assistance. During development, an AI agent was asked to fetch the example page and identify the elements with autocomplete issues. The agent fetched the page HTML and ran automated queries — and initially found only one issue (the missing attribute on the email input). It did not find the invalid value on the <code>&lt;form&gt;</code> element.</p>
596
- <p>The reason is instructive. The agent queried for <code>&lt;input&gt;</code>, <code>&lt;select&gt;</code>, and <code>&lt;textarea&gt;</code> elements, but did not initially think to query for the <code>&lt;form&gt;</code> element's <code>autocomplete</code> attribute. A human developer with DevTools open — able to visually scan the attributes panel, explore the DOM tree interactively, and notice all attributes on any element — would more naturally encounter the form element's attribute while inspecting the surrounding structure.</p>
597
- <p>This experience illustrates a real limitation of current AI agents: even though an agent can rapidly process large amounts of text, it does not yet replicate the fluid, exploratory experience of a developer browsing to a page and inspecting its DOM. That exploratory capacity remains a strength of human testing.</p>
598
- </details>
347
+ <p>You might think that this shows rule engines were effective in revealing the defects related to this issue, but this is not a success story. Yes, two rule engines found related defects, but the other eight rule engines did not. Inspection of those eight rule engines reveals that six of them include rules enforcing this standard in some way. So, did these six rule engines report false negatives? Did they fail to enforce their applicable rules here? Perhaps, but perhaps not. Suppose a rule engine has a rule that requires the value of any <code>autocomplete</code> attribute to be in the list of allowed values. Well, <code>new-password</code> is in the list. So that rule engine would, properly, not report a violation of <strong>that</strong> rule.</p>
348
+ <p>We can clearly see from this example that no rule engine will find all accessibility defects, not even all defects that <strong>can</strong> be detected by rule engines. That is why Kilotest uses an <strong>ensemble</strong> of rule engines.</p>
349
+ <p>Of course, running ten rule engines also creates a risk of false positives reported by ten rule engines. In this particular case, there were none. For example, no rule engine claimed to find a defect related to this issue someplace else on the page. Kilotest, like any platform that integrates multiple rule engines, faces greater risks of false positives than a single rule engine would. It combats false positives by investigating user complaints and deprecating rules whose tests are found faulty.</p>
350
+ <h4>By AI agents</h4>
351
+ <p>An AI agent powered by a frontier model was asked to visit the page and report any accessibility defect related to <code>autocomplete</code> attributes. The agent worked for about 3 minutes and then reported a missing <code>autocomplete="email"</code> attribute in the newsletter subscription form. But the page has no newsletter subscription form! It has a form to request a guide, not to subscribe to anything. Moreover, the agent provided code to support its conclusion, and the code differed pervasively from the actual code on the page. This response was a complete fabrication. The agent confessed to it, explaining that it resorted to hallucination when its security restrictions prevented it from accessing the necessary page data.</p>
352
+ <p>A similar AI agent returned correct diagnoses when it was fed the page as an HTML document. That worked, because the two defects relevant to this practicum are discoverable from the HTML of the rendered page. Any defects arising from stylesheets or scripts would have been missed.</p>
353
+ <p>Even when an agent discovered both defects, it added some misleading advice: <q>Replace <code>autocomplete="new-password"</code> with <code>autocomplete="off"</code> if the intent is to suppress autofill on the form. Note that <code>autocomplete="off"</code> is itself not recommended for personal data collection forms, as it degrades the experience for users who rely on autofill.</q> The agent implied that setting <code>autocomplete</code> to <code>"off"</code> in the <code>form</code> element would nullify an <code>autocomplete="email"</code> in the form&rsquo;s <code>input</code> element, but that is wrong, as mentioned in the orientation above.</p>
354
+ <h4>By humans</h4>
355
+ <p>Human testers would handle the defects for this practicum differently, depending on <strong>how</strong> they test.</p>
356
+ <p>At the simplest, a human tester acts like a typical user. A tester doing this would not see the <code>form</code> element and not be able to report its defective <code>autocomplete</code> attribute, because that defect would not prevent completing the email input. When clicking inside the email input, the tester would likely see an autocompletion popup with one or more email addresses. That would be the browser misbehaving, treating the input as asking for the user&rsquo;s email address merely because the input has a <code>type="email"</code> attribute. This is a lucky guess here, but would be a mistake if the input were asking for the email address of an emergency contact. Thus, because of an overzealous browser, the tester would not be aware of any defect in the input either.</p>
357
+ <p>A more complex type of human testing is to use some atypical navigation method or some assistive technology while using the page. A tester doing this would likewise not notice the <code>form</code> defect, and might or might not discover the missing <code>autocomplete</code> attribute on the <code>input</code> element, depending on the details of the navigation method and the behavior of the browser.</p>
358
+ <p>The most exhaustive type of human testing includes not only using the page in various ways but also inspecting the code, the styles, and the accessibility tree by means of the browser developer tools. This would allow an attentive human tester to find both the improper <code>autocomplete</code> attribute on the <code>form</code> element and the missing <code>autocomplete</code> attribute on the <code>input</code> element.</p>
599
359
  </section>
600
360
  </section>
601
- <section id="conclusion" class="main">
602
- <h2>Conclusion</h2>
603
- <p class="time-estimate">About 5 minutes.</p>
604
- <p>You have seen all three accessibility testing strategies applied to the same page and the same issue type. Each strategy has genuine strengths and genuine limitations. The key points to take away:</p>
605
- <ul>
606
- <li><strong>Use all three.</strong> The strategies are complements, not alternatives. Rule engines provide fast, consistent first-pass coverage. AI agents can reason about nuance and explain issues. Human testing catches what the others miss and is the ultimate measure of real-world usability.</li>
607
- <li><strong>Understand what each strategy sees.</strong> A strategy can only report on what it receives. For JavaScript-rendered content, ensure your tools — rule engines, AI agents, or human testers — are working with the rendered DOM, not the raw HTML source.</li>
608
- <li><strong>Prefer ensembles over single tools.</strong> No rule engine covers all WCAG success criteria. Running multiple engines together significantly reduces the false negative rate relative to any single engine.</li>
609
- <li><strong>Verify AI output.</strong> AI agents can produce both false positives and false negatives. Their output is a useful starting point, not a final verdict. Critical review against the relevant standards is essential.</li>
610
- <li><strong>Test early and often.</strong> The cheapest time to fix an accessibility issue is before the page is shipped. Rule-engine testing integrated into a development workflow catches issues while the code is fresh and the fix is small.</li>
611
- </ul>
361
+ <section id="knowledge-check" class="main">
362
+ <h2>Knowledge check</h2>
363
+ <details>
364
+ <summary>Accessibility testing by AI agents</summary>
365
+ <form class="knowledge-check" id="lesson1-quiz" novalidate>
366
+ <fieldset>
367
+ <legend>Choose the best answer</legend>
368
+ <p>Hallucination by an AI agent:</p>
369
+ <ol>
370
+ <li><label><input type="radio" name="q1" value="a"> is unlikely when you give the agent a precise URL</label></li>
371
+ <li><label><input type="radio" name="q1" value="b"> can take the form of a totally fictional answer</label></li>
372
+ <li><label><input type="radio" name="q1" value="c"> is usually obvious</label></li>
373
+ <li><label><input type="radio" name="q1" value="d"> is no longer a problem</label></li>
374
+ </ol>
375
+ <button type="submit">Check answer</button>
376
+ </fieldset>
377
+ <div role="status" id="lesson1-quiz-diagnosis" aria-live="polite" aria-atomic="true" class="quiz-diagnosis"></div>
378
+ </form>
379
+ </details>
380
+ <details>
381
+ <summary>Accessibility testing by humans</summary>
382
+ <form class="knowledge-check" id="lesson2-quiz" novalidate>
383
+ <fieldset>
384
+ <legend>Choose the best answer</legend>
385
+ <p>This orientation asserts that it is probably optimal for humans to do accessibility testing:</p>
386
+ <ol>
387
+ <li><label><input type="radio" name="q2" value="a"> after rule-engine and AI agent testing and after the defects they discovered have been corrected</label></li>
388
+ <li><label><input type="radio" name="q2" value="b"> after rule-engine and AI agent testing but before any defect correction begins</label></li>
389
+ <li><label><input type="radio" name="q2" value="c"> before any other testing</label></li>
390
+ <li><label><input type="radio" name="q2" value="d"> after rule-engine testing but before AI-agent testing</label></li>
391
+ </ol>
392
+ <button type="submit">Check answer</button>
393
+ </fieldset>
394
+ <div role="status" id="lesson2-quiz-diagnosis" aria-live="polite" aria-atomic="true" class="quiz-diagnosis"></div>
395
+ </form>
396
+ </details>
397
+ <details>
398
+ <summary>Practicum topic</summary>
399
+ <form class="knowledge-check" id="lesson3-quiz" novalidate>
400
+ <fieldset>
401
+ <legend>Choose the best answer</legend>
402
+ <p>This practicum dealt with an issue that can occur in:</p>
403
+ <ol>
404
+ <li><label><input type="radio" name="q3" value="a"> HTTP</label></li>
405
+ <li><label><input type="radio" name="q3" value="b"> HTML</label></li>
406
+ <li><label><input type="radio" name="q3" value="c"> CSS</label></li>
407
+ <li><label><input type="radio" name="q3" value="d"> JavaScript</label></li>
408
+ </ol>
409
+ <button type="submit">Check answer</button>
410
+ </fieldset>
411
+ <div role="status" id="lesson3-quiz-diagnosis" aria-live="polite" aria-atomic="true" class="quiz-diagnosis"></div>
412
+ </form>
413
+ </details>
414
+ <details>
415
+ <summary>Practicum testing</summary>
416
+ <form class="knowledge-check" id="lesson4-quiz" novalidate>
417
+ <fieldset>
418
+ <legend>Choose the best answer</legend>
419
+ <p>In this practicum, which rule engines discovered both <code>autocomplete</code> defects?</p>
420
+ <ol>
421
+ <li><label><input type="radio" name="q4" value="a"> All 10 rule engines</label></li>
422
+ <li><label><input type="radio" name="q4" value="b"> Six of the rule engines></li>
423
+ <li><label><input type="radio" name="q4" value="c"> Only Testaro and HTML CodeSniffer</label></li>
424
+ <li><label><input type="radio" name="q4" value="d"> None of the rule engines</label></li>
425
+ </ol>
426
+ <button type="submit">Check answer</button>
427
+ </fieldset>
428
+ <div role="status" id="lesson4-quiz-diagnosis" aria-live="polite" aria-atomic="true" class="quiz-diagnosis"></div>
429
+ </form>
430
+ </details>
612
431
  </section>
613
432
  <section id="further-reading" class="main">
614
433
  <h2>Further reading</h2>
@@ -622,7 +441,6 @@
622
441
  </ul>
623
442
  </section>
624
443
  <section id="suggest-improvement" class="main">
625
- <!--<div class="comment-section" id="feedback-section">-->
626
444
  <h2>Suggest improvements</h2>
627
445
  <p>Please submit suggestions for improving this tutorial below. The Kilotest managers will review them.</p>
628
446
  <form id="comment-form" novalidate>
@@ -639,8 +457,7 @@
639
457
  <p class="char-count" id="char-count-msg"><span id="char-remaining">500</span> characters remaining</p>
640
458
  <button type="submit" id="comment-submit">Submit suggestions</button>
641
459
  </form>
642
- <div role="status" id="comment-feedback" aria-live="polite" aria-atomic="true" class="comment-feedback"></div>
643
- <!--</div>-->
460
+ <div role="status" id="comment-diagnosis" aria-live="polite" aria-atomic="true" class="comment-diagnosis"></div>
644
461
  </section>
645
462
  </main>
646
463
  <script>
@@ -648,28 +465,131 @@
648
465
  document.getElementById('lesson1-quiz').addEventListener('submit', function (e) {
649
466
  e.preventDefault();
650
467
  const selected = this.querySelector('input[name="q1"]:checked');
651
- const feedback = document.getElementById('lesson1-quiz-feedback');
468
+ const diagnosis = document.getElementById('lesson1-quiz-diagnosis');
652
469
  if (!selected) {
653
- feedback.className = 'quiz-feedback incorrect';
654
- feedback.textContent = 'Please select an answer before checking.';
470
+ diagnosis.className = 'quiz-diagnosis incorrect';
471
+ diagnosis.textContent = 'Please select an answer before checking.';
655
472
  return;
656
473
  }
657
- if (selected.value === 'c') {
658
- feedback.className = 'quiz-feedback correct';
659
- feedback.textContent =
660
- 'Correct. The raw HTML does not contain the newsletter form it is injected by JavaScript after page load. Without the form in its input, the AI agent had no violations to find.';
661
- } else {
662
- feedback.className = 'quiz-feedback incorrect';
663
- feedback.textContent =
664
- 'Not quite. The newsletter form is injected by JavaScript and is absent from the raw HTML source. The AI agent could not find what was not in the content it received.';
474
+ if (selected.value === 'b') {
475
+ diagnosis.className = 'quiz-diagnosis correct';
476
+ diagnosis.textContent =
477
+ 'Correct. AI agents can make answers up when they cannot get the information they need for a correct answer.';
478
+ }
479
+ else if (selected.value === 'a') {
480
+ diagnosis.className = 'quiz-diagnosis incorrect';
481
+ diagnosis.textContent =
482
+ 'Actually, AI agents have trouble examining live web pages, so if you supply a URL to test the agent may resort to a completely fabricated answer.';
483
+ }
484
+ else if (selected.value === 'c') {
485
+ diagnosis.className = 'quiz-diagnosis incorrect';
486
+ diagnosis.textContent =
487
+ 'Actually, hallucinated answers are usually quite believable.';
488
+ }
489
+ else if (selected.value === 'd') {
490
+ diagnosis.className = 'quiz-diagnosis incorrect';
491
+ diagnosis.textContent =
492
+ 'Actually, while there are efforts to prevent hallucination, it is still rampant.';
493
+ }
494
+ });
495
+ // ── Knowledge check (Lesson 2) ──────────────────────────────
496
+ document.getElementById('lesson2-quiz').addEventListener('submit', function (e) {
497
+ e.preventDefault();
498
+ const selected = this.querySelector('input[name="q2"]:checked');
499
+ const diagnosis = document.getElementById('lesson2-quiz-diagnosis');
500
+ if (!selected) {
501
+ diagnosis.className = 'quiz-diagnosis incorrect';
502
+ diagnosis.textContent = 'Please select an answer before checking.';
503
+ return;
504
+ }
505
+ if (selected.value === 'a') {
506
+ diagnosis.className = 'quiz-diagnosis correct';
507
+ diagnosis.textContent =
508
+ 'Correct. It says that human testing, which is the most expensive type, will be more efficient and more effective if defects discovered by rule engines and AI agents have already been corrected.';
509
+ }
510
+ else if (selected.value === 'b') {
511
+ diagnosis.className = 'quiz-diagnosis incorrect';
512
+ diagnosis.textContent =
513
+ 'Actually, it recommends correcting the defects found by rule engines and AI agents first, and only then beginning the human testing.';
514
+ }
515
+ else if (selected.value === 'c') {
516
+ diagnosis.className = 'quiz-diagnosis incorrect';
517
+ diagnosis.textContent =
518
+ 'Actually, it recommends that human testing be last, not first.';
519
+ }
520
+ else if (selected.value === 'd') {
521
+ diagnosis.className = 'quiz-diagnosis incorrect';
522
+ diagnosis.textContent =
523
+ 'Actually, it recommends doing AI testing before, not after, human testing.';
524
+ }
525
+ });
526
+ // ── Knowledge check (Lesson 3) ──────────────────────────────
527
+ document.getElementById('lesson3-quiz').addEventListener('submit', function (e) {
528
+ e.preventDefault();
529
+ const selected = this.querySelector('input[name="q3"]:checked');
530
+ const diagnosis = document.getElementById('lesson3-quiz-diagnosis');
531
+ if (!selected) {
532
+ diagnosis.className = 'quiz-diagnosis incorrect';
533
+ diagnosis.textContent = 'Please select an answer before checking.';
534
+ return;
535
+ }
536
+ if (selected.value === 'b') {
537
+ diagnosis.className = 'quiz-diagnosis correct';
538
+ diagnosis.textContent =
539
+ 'Correct. The autocomplete attribute is an attribute of an HTML element.';
540
+ }
541
+ else if (selected.value === 'a') {
542
+ diagnosis.className = 'quiz-diagnosis incorrect';
543
+ diagnosis.textContent =
544
+ 'Actually, HTTP gets the page, but the issue in this practicum occurs within the page.';
545
+ }
546
+ else if (selected.value === 'c') {
547
+ diagnosis.className = 'quiz-diagnosis incorrect';
548
+ diagnosis.textContent =
549
+ 'Actually, the issue is not a style issue. A style of "display: none" could render the issue moot, but the issue comes from errors in the code of the page itself.';
550
+ }
551
+ else if (selected.value === 'd') {
552
+ diagnosis.className = 'quiz-diagnosis incorrect';
553
+ diagnosis.textContent =
554
+ 'Actually, this is not a script issue. A script might have created the erroneous code, but the issue is with the code of the page.';
555
+ }
556
+ });
557
+ // ── Knowledge check (Lesson 4) ──────────────────────────────
558
+ document.getElementById('lesson4-quiz').addEventListener('submit', function (e) {
559
+ e.preventDefault();
560
+ const selected = this.querySelector('input[name="q4"]:checked');
561
+ const diagnosis = document.getElementById('lesson4-quiz-diagnosis');
562
+ if (!selected) {
563
+ diagnosis.className = 'quiz-diagnosis incorrect';
564
+ diagnosis.textContent = 'Please select an answer before checking.';
565
+ return;
566
+ }
567
+ if (selected.value === 'd') {
568
+ diagnosis.className = 'quiz-diagnosis correct';
569
+ diagnosis.textContent =
570
+ 'Correct. Two rule engines discovered one defect each, but no rule engine discovered them both.';
571
+ }
572
+ else if (selected.value === 'a') {
573
+ diagnosis.className = 'quiz-diagnosis incorrect';
574
+ diagnosis.textContent =
575
+ 'Actually, four of the rule engines did not even have rules about autocomplete attributes.';
576
+ }
577
+ else if (selected.value === 'b') {
578
+ diagnosis.className = 'quiz-diagnosis incorrect';
579
+ diagnosis.textContent =
580
+ 'Actually, six rule engines contained rules about autocomplete attributes, but four of those did not discover either defect.';
581
+ }
582
+ else if (selected.value === 'c') {
583
+ diagnosis.className = 'quiz-diagnosis incorrect';
584
+ diagnosis.textContent =
585
+ 'Actually, you correctly identified the two rule engines that discovered defects, but each of them discovered only one of the two defects.';
665
586
  }
666
587
  });
667
-
668
588
  // ── Comment form ─────────────────────────────────────────────
669
589
  const commentText = document.getElementById('comment-text');
670
590
  const charRemaining = document.getElementById('char-remaining');
671
591
  const charCountMsg = document.getElementById('char-count-msg');
672
- const commentFeedback = document.getElementById('comment-feedback');
592
+ const commentFeedback = document.getElementById('comment-diagnosis');
673
593
  const commentSubmit = document.getElementById('comment-submit');
674
594
 
675
595
  commentText.addEventListener('input', function () {
@@ -681,10 +601,10 @@
681
601
  document.getElementById('comment-form').addEventListener('submit', async function (e) {
682
602
  e.preventDefault();
683
603
  const content = commentText.value.trim();
684
- commentFeedback.className = 'comment-feedback';
604
+ commentFeedback.className = 'comment-diagnosis';
685
605
  commentFeedback.textContent = '';
686
606
  if (!content) {
687
- commentFeedback.className = 'comment-feedback error';
607
+ commentFeedback.className = 'comment-diagnosis error';
688
608
  commentFeedback.textContent = 'Please enter a suggestion before submitting.';
689
609
  return;
690
610
  }
@@ -697,20 +617,20 @@
697
617
  });
698
618
  const data = await response.json();
699
619
  if (data.status === 'ok') {
700
- commentFeedback.className = 'comment-feedback success';
620
+ commentFeedback.className = 'comment-diagnosis success';
701
621
  commentFeedback.textContent =
702
622
  'Thank you! Your suggestion has been saved and will be reviewed by the tutorial authors.';
703
623
  this.reset();
704
624
  charRemaining.textContent = '500';
705
625
  charCountMsg.className = 'char-count';
706
626
  } else {
707
- commentFeedback.className = 'comment-feedback error';
627
+ commentFeedback.className = 'comment-diagnosis error';
708
628
  commentFeedback.textContent =
709
629
  'Your suggestion could not be saved. Please try again later.';
710
630
  commentSubmit.disabled = false;
711
631
  }
712
632
  } catch (_) {
713
- commentFeedback.className = 'comment-feedback error';
633
+ commentFeedback.className = 'comment-diagnosis error';
714
634
  commentFeedback.textContent =
715
635
  'A connection error occurred. Please check your connection and try again.';
716
636
  commentSubmit.disabled = false;