pulse-code 1.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. pulse_code-1.0.1/LICENSE +121 -0
  2. pulse_code-1.0.1/PKG-INFO +249 -0
  3. pulse_code-1.0.1/README.md +232 -0
  4. pulse_code-1.0.1/pyproject.toml +43 -0
  5. pulse_code-1.0.1/setup.cfg +4 -0
  6. pulse_code-1.0.1/src/pulse/__init__.py +1 -0
  7. pulse_code-1.0.1/src/pulse/__main__.py +4 -0
  8. pulse_code-1.0.1/src/pulse/catalog.py +102 -0
  9. pulse_code-1.0.1/src/pulse/cli.py +984 -0
  10. pulse_code-1.0.1/src/pulse/data/catalog.json +1599 -0
  11. pulse_code-1.0.1/src/pulse/data/queries_index.json +328 -0
  12. pulse_code-1.0.1/src/pulse/data/variable_labels.json +1338 -0
  13. pulse_code-1.0.1/src/pulse/llm_builder.py +732 -0
  14. pulse_code-1.0.1/src/pulse/matcher.py +180 -0
  15. pulse_code-1.0.1/src/pulse/queries/aids-cases-by-year-1981-1999-req.xml +178 -0
  16. pulse_code-1.0.1/src/pulse/queries/births-by-year-1995-2002-req.xml +226 -0
  17. pulse_code-1.0.1/src/pulse/queries/births-by-year-2003-2006-req.xml +306 -0
  18. pulse_code-1.0.1/src/pulse/queries/births-by-year-2007-2024-req.xml +334 -0
  19. pulse_code-1.0.1/src/pulse/queries/cancer-incidence-by-site-by-year-1999-2022-req.xml +174 -0
  20. pulse_code-1.0.1/src/pulse/queries/cancer-mortality-by-site-by-year-2018-2023-req.xml +166 -0
  21. pulse_code-1.0.1/src/pulse/queries/covid-deaths-by-race-2020-2023-req.xml +529 -0
  22. pulse_code-1.0.1/src/pulse/queries/drug-deaths-by-month-1999-2020-req.xml +436 -0
  23. pulse_code-1.0.1/src/pulse/queries/drug-deaths-by-month-2018-2024-req.xml +544 -0
  24. pulse_code-1.0.1/src/pulse/queries/drug-deaths-by-year-1999-2020-req.xml +436 -0
  25. pulse_code-1.0.1/src/pulse/queries/drug-deaths-by-year-2018-2024-req.xml +536 -0
  26. pulse_code-1.0.1/src/pulse/queries/fentanyl-deaths-by-month-1999-2020-req.xml +430 -0
  27. pulse_code-1.0.1/src/pulse/queries/fentanyl-deaths-by-month-2018-2024-req.xml +530 -0
  28. pulse_code-1.0.1/src/pulse/queries/fetal-deaths-by-cause-by-year-2014-2024-req.xml +530 -0
  29. pulse_code-1.0.1/src/pulse/queries/fetal-deaths-by-year-2005-2024-req.xml +322 -0
  30. pulse_code-1.0.1/src/pulse/queries/heart-vs-cancer-by-sex-2018-2023-req.xml +532 -0
  31. pulse_code-1.0.1/src/pulse/queries/heat-wave-days-by-county-req.xml +154 -0
  32. pulse_code-1.0.1/src/pulse/queries/infant-mortality-2018-2023-req.xml +531 -0
  33. pulse_code-1.0.1/src/pulse/queries/infant-mortality-by-cause-by-year-2007-2023-req.xml +290 -0
  34. pulse_code-1.0.1/src/pulse/queries/maternal-mortality-by-year-1999-2020-req.xml +351 -0
  35. pulse_code-1.0.1/src/pulse/queries/maternal-mortality-by-year-2018-2024-req.xml +413 -0
  36. pulse_code-1.0.1/src/pulse/queries/mortality-by-race-sex-2018-2023-req.xml +490 -0
  37. pulse_code-1.0.1/src/pulse/queries/mortality-by-year-cause-1979-1998-req.xml +222 -0
  38. pulse_code-1.0.1/src/pulse/queries/mortality-by-year-cause-1999-2020-req.xml +434 -0
  39. pulse_code-1.0.1/src/pulse/queries/mortality-by-year-cause-2021-2024-req.xml +529 -0
  40. pulse_code-1.0.1/src/pulse/queries/opioid-overdose-deaths-2018-2024-req.xml +544 -0
  41. pulse_code-1.0.1/src/pulse/queries/pm25-by-year-2003-2011-req.xml +194 -0
  42. pulse_code-1.0.1/src/pulse/queries/provisional-births-by-month-2023-req.xml +854 -0
  43. pulse_code-1.0.1/src/pulse/queries/racial-mortality-gap-2018-2023-req.xml +531 -0
  44. pulse_code-1.0.1/src/pulse/queries/std-cases-by-disease-by-year-1984-2014-req.xml +178 -0
  45. pulse_code-1.0.1/src/pulse/queries/suicide-by-sex-1999-2020-req.xml +411 -0
  46. pulse_code-1.0.1/src/pulse/queries/suicide-by-sex-2021-2024-req.xml +551 -0
  47. pulse_code-1.0.1/src/pulse/queries/tb-cases-by-year-1993-2023-req.xml +206 -0
  48. pulse_code-1.0.1/src/pulse/queries/tick-borne-diseases-by-year-2016-2023-req.xml +125 -0
  49. pulse_code-1.0.1/src/pulse/queries/underlying-cause-mortality-by-year-1999-2020-req.xml +350 -0
  50. pulse_code-1.0.1/src/pulse/queries/unintentional-injuries-by-age-2018-2023-req.xml +531 -0
  51. pulse_code-1.0.1/src/pulse/templates/D10-base.xml +226 -0
  52. pulse_code-1.0.1/src/pulse/templates/D104-base.xml +142 -0
  53. pulse_code-1.0.1/src/pulse/templates/D117-base.xml +110 -0
  54. pulse_code-1.0.1/src/pulse/templates/D128-base.xml +182 -0
  55. pulse_code-1.0.1/src/pulse/templates/D140-base.xml +318 -0
  56. pulse_code-1.0.1/src/pulse/templates/D141-base.xml +454 -0
  57. pulse_code-1.0.1/src/pulse/templates/D149-base.xml +878 -0
  58. pulse_code-1.0.1/src/pulse/templates/D157-base.xml +490 -0
  59. pulse_code-1.0.1/src/pulse/templates/D158-base.xml +406 -0
  60. pulse_code-1.0.1/src/pulse/templates/D159-base.xml +774 -0
  61. pulse_code-1.0.1/src/pulse/templates/D16-base.xml +266 -0
  62. pulse_code-1.0.1/src/pulse/templates/D176-base.xml +526 -0
  63. pulse_code-1.0.1/src/pulse/templates/D178-base.xml +158 -0
  64. pulse_code-1.0.1/src/pulse/templates/D18-base.xml +262 -0
  65. pulse_code-1.0.1/src/pulse/templates/D192-base.xml +854 -0
  66. pulse_code-1.0.1/src/pulse/templates/D204-base.xml +142 -0
  67. pulse_code-1.0.1/src/pulse/templates/D23-base.xml +258 -0
  68. pulse_code-1.0.1/src/pulse/templates/D27-base.xml +342 -0
  69. pulse_code-1.0.1/src/pulse/templates/D31-base.xml +262 -0
  70. pulse_code-1.0.1/src/pulse/templates/D60-base.xml +274 -0
  71. pulse_code-1.0.1/src/pulse/templates/D61-base.xml +250 -0
  72. pulse_code-1.0.1/src/pulse/templates/D66-base.xml +378 -0
  73. pulse_code-1.0.1/src/pulse/templates/D69-base.xml +278 -0
  74. pulse_code-1.0.1/src/pulse/templates/D73-base.xml +182 -0
  75. pulse_code-1.0.1/src/pulse/templates/D74-base.xml +254 -0
  76. pulse_code-1.0.1/src/pulse/templates/D76-base.xml +350 -0
  77. pulse_code-1.0.1/src/pulse/templates/D77-base.xml +434 -0
  78. pulse_code-1.0.1/src/pulse/templates/D8-base.xml +314 -0
  79. pulse_code-1.0.1/src/pulse/templates/D80-base.xml +174 -0
  80. pulse_code-1.0.1/src/pulse/templates/D81-base.xml +178 -0
  81. pulse_code-1.0.1/src/pulse/wonder_client.py +161 -0
  82. pulse_code-1.0.1/src/pulse_code.egg-info/PKG-INFO +249 -0
  83. pulse_code-1.0.1/src/pulse_code.egg-info/SOURCES.txt +85 -0
  84. pulse_code-1.0.1/src/pulse_code.egg-info/dependency_links.txt +1 -0
  85. pulse_code-1.0.1/src/pulse_code.egg-info/entry_points.txt +2 -0
  86. pulse_code-1.0.1/src/pulse_code.egg-info/requires.txt +8 -0
  87. pulse_code-1.0.1/src/pulse_code.egg-info/top_level.txt +1 -0
@@ -0,0 +1,121 @@
1
+ Creative Commons Legal Code
2
+
3
+ CC0 1.0 Universal
4
+
5
+ CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE
6
+ LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN
7
+ ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS
8
+ INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES
9
+ REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS
10
+ PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM
11
+ THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED
12
+ HEREUNDER.
13
+
14
+ Statement of Purpose
15
+
16
+ The laws of most jurisdictions throughout the world automatically confer
17
+ exclusive Copyright and Related Rights (defined below) upon the creator
18
+ and subsequent owner(s) (each and all, an "owner") of an original work of
19
+ authorship and/or a database (each, a "Work").
20
+
21
+ Certain owners wish to permanently relinquish those rights to a Work for
22
+ the purpose of contributing to a commons of creative, cultural and
23
+ scientific works ("Commons") that the public can reliably and without fear
24
+ of later claims of infringement build upon, modify, incorporate in other
25
+ works, reuse and redistribute as freely as possible in any form whatsoever
26
+ and for any purposes, including without limitation commercial purposes.
27
+ These owners may contribute to the Commons to promote the ideal of a free
28
+ culture and the further production of creative, cultural and scientific
29
+ works, or to gain reputation or greater distribution for their Work in
30
+ part through the use and efforts of others.
31
+
32
+ For these and/or other purposes and motivations, and without any
33
+ expectation of additional consideration or compensation, the person
34
+ associating CC0 with a Work (the "Affirmer"), to the extent that he or she
35
+ is an owner of Copyright and Related Rights in the Work, voluntarily
36
+ elects to apply CC0 to the Work and publicly distribute the Work under its
37
+ terms, with knowledge of his or her Copyright and Related Rights in the
38
+ Work and the meaning and intended legal effect of CC0 on those rights.
39
+
40
+ 1. Copyright and Related Rights. A Work made available under CC0 may be
41
+ protected by copyright and related or neighboring rights ("Copyright and
42
+ Related Rights"). Copyright and Related Rights include, but are not
43
+ limited to, the following:
44
+
45
+ i. the right to reproduce, adapt, distribute, perform, display,
46
+ communicate, and translate a Work;
47
+ ii. moral rights retained by the original author(s) and/or performer(s);
48
+ iii. publicity and privacy rights pertaining to a person's image or
49
+ likeness depicted in a Work;
50
+ iv. rights protecting against unfair competition in regards to a Work,
51
+ subject to the limitations in paragraph 4(a), below;
52
+ v. rights protecting the extraction, dissemination, use and reuse of data
53
+ in a Work;
54
+ vi. database rights (such as those arising under Directive 96/9/EC of the
55
+ European Parliament and of the Council of 11 March 1996 on the legal
56
+ protection of databases, and under any national implementation
57
+ thereof, including any amended or successor version of such
58
+ directive); and
59
+ vii. other similar, equivalent or corresponding rights throughout the
60
+ world based on applicable law or treaty, and any national
61
+ implementations thereof.
62
+
63
+ 2. Waiver. To the greatest extent permitted by, but not in contravention
64
+ of, applicable law, Affirmer hereby overtly, fully, permanently,
65
+ irrevocably and unconditionally waives, abandons, and surrenders all of
66
+ Affirmer's Copyright and Related Rights and associated claims and causes
67
+ of action, whether now known or unknown (including existing as well as
68
+ future claims and causes of action), in the Work (i) in all territories
69
+ worldwide, (ii) for the maximum duration provided by applicable law or
70
+ treaty (including future time extensions), (iii) in any current or future
71
+ medium and for any number of copies, and (iv) for any purpose whatsoever,
72
+ including without limitation commercial, advertising or promotional
73
+ purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each
74
+ member of the public at large and to the detriment of Affirmer's heirs and
75
+ successors, fully intending that such Waiver shall not be subject to
76
+ revocation, rescission, cancellation, termination, or any other legal or
77
+ equitable action to disrupt the quiet enjoyment of the Work by the public
78
+ as contemplated by Affirmer's express Statement of Purpose.
79
+
80
+ 3. Public License Fallback. Should any part of the Waiver for any reason
81
+ be judged legally invalid or ineffective under applicable law, then the
82
+ Waiver shall be preserved to the maximum extent permitted taking into
83
+ account Affirmer's express Statement of Purpose. In addition, to the
84
+ extent the Waiver is so judged Affirmer hereby grants to each affected
85
+ person a royalty-free, non transferable, non sublicensable, non exclusive,
86
+ irrevocable and unconditional license to exercise Affirmer's Copyright and
87
+ Related Rights in the Work (i) in all territories worldwide, (ii) for the
88
+ maximum duration provided by applicable law or treaty (including future
89
+ time extensions), (iii) in any current or future medium and for any number
90
+ of copies, and (iv) for any purpose whatsoever, including without
91
+ limitation commercial, advertising or promotional purposes (the
92
+ "License"). The License shall be deemed effective as of the date CC0 was
93
+ applied by Affirmer to the Work. Should any part of the License for any
94
+ reason be judged legally invalid or ineffective under applicable law, such
95
+ partial invalidity or ineffectiveness shall not invalidate the remainder
96
+ of the License, and in such case Affirmer hereby affirms that he or she
97
+ will not (i) exercise any of his or her remaining Copyright and Related
98
+ Rights in the Work or (ii) assert any associated claims and causes of
99
+ action with respect to the Work, in either case contrary to Affirmer's
100
+ express Statement of Purpose.
101
+
102
+ 4. Limitations and Disclaimers.
103
+
104
+ a. No trademark or patent rights held by Affirmer are waived, abandoned,
105
+ surrendered, licensed or otherwise affected by this document.
106
+ b. Affirmer offers the Work as-is and makes no representations or
107
+ warranties of any kind concerning the Work, express, implied,
108
+ statutory or otherwise, including without limitation warranties of
109
+ title, merchantability, fitness for a particular purpose, non
110
+ infringement, or the absence of latent or other defects, accuracy, or
111
+ the present or absence of errors, whether or not discoverable, all to
112
+ the greatest extent permissible under applicable law.
113
+ c. Affirmer disclaims responsibility for clearing rights of other persons
114
+ that may apply to the Work or any use thereof, including without
115
+ limitation any person's Copyright and Related Rights in the Work.
116
+ Further, Affirmer disclaims responsibility for obtaining any necessary
117
+ consents, permissions or other rights required for any use of the
118
+ Work.
119
+ d. Affirmer understands and acknowledges that Creative Commons is not a
120
+ party to this document and has no duty or obligation with respect to
121
+ this CC0 or use of the Work.
@@ -0,0 +1,249 @@
1
+ Metadata-Version: 2.4
2
+ Name: pulse-code
3
+ Version: 1.0.1
4
+ Summary: CDC WONDER query CLI — explore, build, and refine public health data queries
5
+ Requires-Python: >=3.14
6
+ Description-Content-Type: text/markdown
7
+ License-File: LICENSE
8
+ Requires-Dist: anthropic==0.115.1
9
+ Requires-Dist: openai==2.44.0
10
+ Requires-Dist: pydantic==2.13.4
11
+ Requires-Dist: python-dotenv==1.2.2
12
+ Requires-Dist: requests==2.34.2
13
+ Requires-Dist: rich==15.0.0
14
+ Requires-Dist: socksio==1.0.0
15
+ Requires-Dist: typer==0.26.8
16
+ Dynamic: license-file
17
+
18
+ # pulse
19
+
20
+ CDC WONDER public health query CLI — explore datasets, run bundled queries, and use Claude to build and refine custom XML queries for public health data that Americans should care about.
21
+
22
+ ## What is this?
23
+
24
+ [CDC WONDER](https://wonder.cdc.gov/) (Wide-ranging ONline Data for Epidemiologic Research) is the government's primary interface for public health statistics: drug overdose deaths, maternal mortality, birth rates, COVID deaths by race, suicide trends, vaccine adverse events, and much more. Its XML API is powerful but opaque.
25
+
26
+ `pulse` makes it usable:
27
+
28
+ - **Explore** all datasets with clear descriptions of what they cover and when
29
+ - **Search** by topic to find the right dataset or a working example query
30
+ - **Run** bundled, validated XML queries directly against the CDC API
31
+ - **Build** new queries from natural language using Claude
32
+ - **Refine** existing queries with conversational feedback
33
+
34
+ ## Setup
35
+
36
+ ```bash
37
+ # Install (requires Python 3.12+)
38
+ uv sync
39
+
40
+ # For build/query/refine/compare/chat commands, set your Anthropic API key:
41
+ export ANTHROPIC_API_KEY=sk-ant-...
42
+ # or put it in a .env file:
43
+ echo "ANTHROPIC_API_KEY=sk-ant-..." > .env
44
+ ```
45
+
46
+ ### LLM provider
47
+
48
+ `pulse` defaults to Anthropic Claude but can also run against an Azure
49
+ OpenAI Foundry deployment (e.g. GPT-5.4). Select the provider with
50
+ `LLM_PROVIDER` (defaults to `anthropic`):
51
+
52
+ ```bash
53
+ # Anthropic (default) — needs ANTHROPIC_API_KEY as above
54
+
55
+ # Azure OpenAI Foundry
56
+ export LLM_PROVIDER=azure_openai
57
+ export AZURE_OPENAI_API_KEY=...
58
+ export AZURE_OPENAI_ENDPOINT=https://<your-resource>.openai.azure.com
59
+ export AZURE_OPENAI_DEPLOYMENT=<your-gpt-5.4-deployment-name>
60
+ export AZURE_OPENAI_API_VERSION=<api-version-your-resource-supports>
61
+ ```
62
+
63
+ All four `AZURE_OPENAI_*` variables are required when `LLM_PROVIDER=azure_openai`;
64
+ `pulse` will tell you which ones are missing. These can also go in a `.env`
65
+ file alongside `ANTHROPIC_API_KEY`.
66
+
67
+ If the LLM endpoint isn't directly reachable — e.g. an Azure OpenAI resource
68
+ with public network access disabled, requiring a private endpoint — bridge
69
+ the connection through a proxy with `LLM_HTTP_PROXY`. Applies to both
70
+ providers, and supports `http://`, `https://`, `socks5://`, and `socks5h://`
71
+ (DNS resolved through the proxy):
72
+
73
+ ```bash
74
+ export LLM_HTTP_PROXY=socks5h://user:pass@host:port
75
+ ```
76
+
77
+ ## Commands
78
+
79
+ ### `pulse datasets` — what's available
80
+
81
+ ```bash
82
+ pulse datasets # all datasets
83
+ pulse datasets --topic Mortality # filter by topic
84
+ pulse datasets --json # JSON output
85
+ ```
86
+
87
+ Shows all 26+ CDC WONDER datasets with: topic, year range, what the data covers, number of bundled example queries, and whether age-adjusted rates are available.
88
+
89
+ **Topics:** Mortality · Infant Mortality · Natality · Environment · Vaccine Safety · Infectious Disease
90
+
91
+ ### `pulse info <ID>` — deep dive on a dataset
92
+
93
+ ```bash
94
+ pulse info D176 # Provisional Mortality (2018–present)
95
+ pulse info D66 # Natality / birth data
96
+ pulse info D8 # VAERS vaccine adverse events
97
+ ```
98
+
99
+ Shows: subject description, available measures, key grouping dimensions, and all bundled example queries for that dataset.
100
+
101
+ ### `pulse search "<topic>"` — find what you need
102
+
103
+ ```bash
104
+ pulse search "opioid overdose deaths by state"
105
+ pulse search "maternal mortality by race"
106
+ pulse search "birth rates 2010 to 2020"
107
+ pulse search "tick-borne disease cases" --queries # queries only
108
+ pulse search "recent COVID deaths" --datasets # datasets only
109
+ ```
110
+
111
+ ### `pulse list-queries` — all bundled example queries
112
+
113
+ ```bash
114
+ pulse list-queries
115
+ pulse list-queries --dataset D176 # filter by dataset
116
+ ```
117
+
118
+ 23 working XML queries covering: drug/opioid/fentanyl deaths, maternal mortality, births, COVID deaths by race, suicide, tick-borne diseases, racial mortality gap, infant mortality, heart disease vs. cancer, and more.
119
+
120
+ ### `pulse run <query>` — execute a query
121
+
122
+ ```bash
123
+ # Run a bundled query by filename (no path needed)
124
+ pulse run drug-deaths-by-year-2018-2024-req.xml
125
+
126
+ # Output formats
127
+ pulse run opioid-overdose-deaths-2018-2024-req.xml -f csv
128
+ pulse run mortality-by-year-cause-2021-2024-req.xml -f json
129
+ pulse run births-by-year-2007-2024-req.xml -f table -o births.csv
130
+
131
+ # Run your own query file
132
+ pulse run /path/to/my-query.xml
133
+ ```
134
+
135
+ Hits the live CDC WONDER API. No login required; CDC requires a ~2-minute cooldown between queries.
136
+
137
+ ### `pulse build "<description>"` — build a query with Claude
138
+
139
+ ```bash
140
+ # Requires ANTHROPIC_API_KEY
141
+ pulse build "drug overdose deaths by state and year 2018-2023"
142
+ pulse build "maternal mortality by race, 2018-2023" -o maternal-race.xml
143
+ pulse build "birth rates by age of mother 2010 to 2024" --no-suggest
144
+ ```
145
+
146
+ Suggests closest existing queries first, then calls Claude to build a new XML query. The LLM selects the right dataset and generates overrides merged onto a validated base template.
147
+
148
+ ### `pulse query "<description>"` — build and run in one step
149
+
150
+ ```bash
151
+ pulse query "fentanyl deaths by state 2020-2024" -f csv
152
+ pulse query "infant mortality by race 2018-2023" --save-xml infant-race.xml
153
+ ```
154
+
155
+ ### `pulse refine <file> "<feedback>"` — iterate on a query
156
+
157
+ ```bash
158
+ pulse refine opioid-overdose-deaths-2018-2024-req.xml "break it down by state"
159
+ pulse refine drug-deaths-by-year-2018-2024-req.xml "add sex breakdown" -o drug-sex.xml
160
+ pulse refine drug-deaths-by-year-2018-2024-req.xml "show monthly not yearly" --run -f csv
161
+ ```
162
+
163
+ ## Testing
164
+
165
+ ```bash
166
+ uv run pytest # unit tests only — fast, no network (default)
167
+ uv run pytest -m integration # + integration tests (see below)
168
+ ```
169
+
170
+ Unit tests cover catalog/matcher lookups, XML template merging (including
171
+ the CDC WONDER radio-button-trap regression), AAR constraints, provider
172
+ selection, and the offline-network-free CLI commands.
173
+
174
+ Integration tests (`tests/integration/`) are excluded by default and split
175
+ into two kinds:
176
+
177
+ - **`test_socks_proxy_integration.py`** — always runs. Spins up a local
178
+ SOCKS5 relay and a local mock LLM HTTP server, so it genuinely exercises
179
+ `LLM_HTTP_PROXY` end-to-end (real SOCKS handshake, real HTTP
180
+ request/response) without needing real Azure/Anthropic credentials.
181
+ - **`test_llm_provider_live.py`** — hits whatever `ANTHROPIC_API_KEY` /
182
+ `LLM_PROVIDER=azure_openai` + `AZURE_OPENAI_*` / `LLM_HTTP_PROXY` you
183
+ actually have configured. Skips if credentials aren't set; also skips
184
+ (rather than fails) if the provider is reachable but blocked at the
185
+ network layer (e.g. an Azure OpenAI resource with public access disabled
186
+ and no working proxy) — that's an environment gap, not a code defect.
187
+
188
+ ## Bundled Datasets (with base templates)
189
+
190
+ | ID | Subject | Years |
191
+ |----|---------|-------|
192
+ | D176 | Provisional mortality — opioids, COVID, suicide, heart disease | 2018–present |
193
+ | D157 | Final mortality, single race (MCD+UCD) | 2018–2023 |
194
+ | D158 | Underlying cause of death, single race — maternal mortality | 2018–2023 |
195
+ | D77 | Multiple cause of death — drug deaths (historical) | 1999–2020 |
196
+ | D76 | Underlying cause of death — suicide, cancer (historical) | 1999–2020 |
197
+ | D141 | MCD with US-Mexico border regions | 1999–2020 |
198
+ | D140 | Compressed mortality ICD-10 | 1999–2016 |
199
+ | D16 | Compressed mortality ICD-9 | 1979–1998 |
200
+ | D74 | Compressed mortality ICD-8 | 1968–1978 |
201
+ | D69 | Linked birth/infant death records | 2007–2023 |
202
+ | D159 | Linked birth/infant death, expanded race | 2017–2023 |
203
+ | D31/D18/D23 | Linked birth/infant death (historical) | 1995–2006 |
204
+ | D66 | Natality — birth rates, birth outcomes | 2007–2024 |
205
+ | D149 | Natality, expanded race detail | 2016–2024 |
206
+ | D192 | Provisional natality (monthly) | 2023–present |
207
+ | D27/D10 | Natality (historical) | 1995–2006 |
208
+ | D8 | VAERS vaccine adverse events | 1990–present |
209
+ | D104 | Heat wave days by county | 1981–2010 |
210
+ | D60/D80/D81 | NLDAS temperature, sunlight, precipitation | 1979–2011 |
211
+ | D73 | PM2.5 fine particulate matter | 2003–2011 |
212
+ | D61 | MODIS land surface temperature | 2003–2008 |
213
+
214
+ ## Public Health Questions You Can Answer
215
+
216
+ - How did opioid overdose deaths trend from 1999 to today, broken down by drug type?
217
+ - What is the racial gap in COVID-19 mortality?
218
+ - How does maternal mortality differ by race and state?
219
+ - Which states have the highest suicide rates by sex?
220
+ - How have birth rates changed by age of mother since 1995?
221
+ - Are tick-borne disease cases increasing?
222
+ - How do PM2.5 air quality levels correlate with where people live?
223
+ - What are the most common adverse events reported after COVID vaccines?
224
+
225
+ ## Releasing
226
+
227
+ Releases are cut by pushing a tag. `publish.yml` (single workflow, one run
228
+ per tag) handles the rest as three sequential jobs:
229
+
230
+ 1. Bump `version` in `pyproject.toml`, commit it.
231
+ 2. `git tag vX.Y.Z && git push origin vX.Y.Z`
232
+ 3. **`build`** builds the sdist/wheel, failing fast if the tag doesn't match
233
+ `pyproject.toml`'s version.
234
+ 4. **`release`** (needs `build`) creates the GitHub Release with the built
235
+ artifacts attached — the source of truth for what shipped.
236
+ 5. **`publish`** (needs `release`) publishes those same artifacts to PyPI
237
+ (`pulse-code`) via trusted publishing (OIDC) against the `prod`
238
+ environment — no API tokens stored in the repo.
239
+
240
+ The `needs:` chain means a failure at any step blocks everything after it —
241
+ e.g. a PyPI hiccup can't leave a GitHub Release around for a package that
242
+ isn't actually installable. If the `publish` job fails after `release`
243
+ succeeds, use "Re-run failed jobs" on that workflow run rather than
244
+ re-tagging. PyPI publishing is immutable: once a version is published it
245
+ can't be re-uploaded, so a bad release means bumping to a new version.
246
+
247
+ ## Based On
248
+
249
+ Built using [fartbagxp/health](https://github.com/fartbagxp/health) as reference — a comprehensive collection of CDC data pipelines and the CDC WONDER XML API client and LLM query builder this tool builds on.
@@ -0,0 +1,232 @@
1
+ # pulse
2
+
3
+ CDC WONDER public health query CLI — explore datasets, run bundled queries, and use Claude to build and refine custom XML queries for public health data that Americans should care about.
4
+
5
+ ## What is this?
6
+
7
+ [CDC WONDER](https://wonder.cdc.gov/) (Wide-ranging ONline Data for Epidemiologic Research) is the government's primary interface for public health statistics: drug overdose deaths, maternal mortality, birth rates, COVID deaths by race, suicide trends, vaccine adverse events, and much more. Its XML API is powerful but opaque.
8
+
9
+ `pulse` makes it usable:
10
+
11
+ - **Explore** all datasets with clear descriptions of what they cover and when
12
+ - **Search** by topic to find the right dataset or a working example query
13
+ - **Run** bundled, validated XML queries directly against the CDC API
14
+ - **Build** new queries from natural language using Claude
15
+ - **Refine** existing queries with conversational feedback
16
+
17
+ ## Setup
18
+
19
+ ```bash
20
+ # Install (requires Python 3.12+)
21
+ uv sync
22
+
23
+ # For build/query/refine/compare/chat commands, set your Anthropic API key:
24
+ export ANTHROPIC_API_KEY=sk-ant-...
25
+ # or put it in a .env file:
26
+ echo "ANTHROPIC_API_KEY=sk-ant-..." > .env
27
+ ```
28
+
29
+ ### LLM provider
30
+
31
+ `pulse` defaults to Anthropic Claude but can also run against an Azure
32
+ OpenAI Foundry deployment (e.g. GPT-5.4). Select the provider with
33
+ `LLM_PROVIDER` (defaults to `anthropic`):
34
+
35
+ ```bash
36
+ # Anthropic (default) — needs ANTHROPIC_API_KEY as above
37
+
38
+ # Azure OpenAI Foundry
39
+ export LLM_PROVIDER=azure_openai
40
+ export AZURE_OPENAI_API_KEY=...
41
+ export AZURE_OPENAI_ENDPOINT=https://<your-resource>.openai.azure.com
42
+ export AZURE_OPENAI_DEPLOYMENT=<your-gpt-5.4-deployment-name>
43
+ export AZURE_OPENAI_API_VERSION=<api-version-your-resource-supports>
44
+ ```
45
+
46
+ All four `AZURE_OPENAI_*` variables are required when `LLM_PROVIDER=azure_openai`;
47
+ `pulse` will tell you which ones are missing. These can also go in a `.env`
48
+ file alongside `ANTHROPIC_API_KEY`.
49
+
50
+ If the LLM endpoint isn't directly reachable — e.g. an Azure OpenAI resource
51
+ with public network access disabled, requiring a private endpoint — bridge
52
+ the connection through a proxy with `LLM_HTTP_PROXY`. Applies to both
53
+ providers, and supports `http://`, `https://`, `socks5://`, and `socks5h://`
54
+ (DNS resolved through the proxy):
55
+
56
+ ```bash
57
+ export LLM_HTTP_PROXY=socks5h://user:pass@host:port
58
+ ```
59
+
60
+ ## Commands
61
+
62
+ ### `pulse datasets` — what's available
63
+
64
+ ```bash
65
+ pulse datasets # all datasets
66
+ pulse datasets --topic Mortality # filter by topic
67
+ pulse datasets --json # JSON output
68
+ ```
69
+
70
+ Shows all 26+ CDC WONDER datasets with: topic, year range, what the data covers, number of bundled example queries, and whether age-adjusted rates are available.
71
+
72
+ **Topics:** Mortality · Infant Mortality · Natality · Environment · Vaccine Safety · Infectious Disease
73
+
74
+ ### `pulse info <ID>` — deep dive on a dataset
75
+
76
+ ```bash
77
+ pulse info D176 # Provisional Mortality (2018–present)
78
+ pulse info D66 # Natality / birth data
79
+ pulse info D8 # VAERS vaccine adverse events
80
+ ```
81
+
82
+ Shows: subject description, available measures, key grouping dimensions, and all bundled example queries for that dataset.
83
+
84
+ ### `pulse search "<topic>"` — find what you need
85
+
86
+ ```bash
87
+ pulse search "opioid overdose deaths by state"
88
+ pulse search "maternal mortality by race"
89
+ pulse search "birth rates 2010 to 2020"
90
+ pulse search "tick-borne disease cases" --queries # queries only
91
+ pulse search "recent COVID deaths" --datasets # datasets only
92
+ ```
93
+
94
+ ### `pulse list-queries` — all bundled example queries
95
+
96
+ ```bash
97
+ pulse list-queries
98
+ pulse list-queries --dataset D176 # filter by dataset
99
+ ```
100
+
101
+ 23 working XML queries covering: drug/opioid/fentanyl deaths, maternal mortality, births, COVID deaths by race, suicide, tick-borne diseases, racial mortality gap, infant mortality, heart disease vs. cancer, and more.
102
+
103
+ ### `pulse run <query>` — execute a query
104
+
105
+ ```bash
106
+ # Run a bundled query by filename (no path needed)
107
+ pulse run drug-deaths-by-year-2018-2024-req.xml
108
+
109
+ # Output formats
110
+ pulse run opioid-overdose-deaths-2018-2024-req.xml -f csv
111
+ pulse run mortality-by-year-cause-2021-2024-req.xml -f json
112
+ pulse run births-by-year-2007-2024-req.xml -f table -o births.csv
113
+
114
+ # Run your own query file
115
+ pulse run /path/to/my-query.xml
116
+ ```
117
+
118
+ Hits the live CDC WONDER API. No login required; CDC requires a ~2-minute cooldown between queries.
119
+
120
+ ### `pulse build "<description>"` — build a query with Claude
121
+
122
+ ```bash
123
+ # Requires ANTHROPIC_API_KEY
124
+ pulse build "drug overdose deaths by state and year 2018-2023"
125
+ pulse build "maternal mortality by race, 2018-2023" -o maternal-race.xml
126
+ pulse build "birth rates by age of mother 2010 to 2024" --no-suggest
127
+ ```
128
+
129
+ Suggests closest existing queries first, then calls Claude to build a new XML query. The LLM selects the right dataset and generates overrides merged onto a validated base template.
130
+
131
+ ### `pulse query "<description>"` — build and run in one step
132
+
133
+ ```bash
134
+ pulse query "fentanyl deaths by state 2020-2024" -f csv
135
+ pulse query "infant mortality by race 2018-2023" --save-xml infant-race.xml
136
+ ```
137
+
138
+ ### `pulse refine <file> "<feedback>"` — iterate on a query
139
+
140
+ ```bash
141
+ pulse refine opioid-overdose-deaths-2018-2024-req.xml "break it down by state"
142
+ pulse refine drug-deaths-by-year-2018-2024-req.xml "add sex breakdown" -o drug-sex.xml
143
+ pulse refine drug-deaths-by-year-2018-2024-req.xml "show monthly not yearly" --run -f csv
144
+ ```
145
+
146
+ ## Testing
147
+
148
+ ```bash
149
+ uv run pytest # unit tests only — fast, no network (default)
150
+ uv run pytest -m integration # + integration tests (see below)
151
+ ```
152
+
153
+ Unit tests cover catalog/matcher lookups, XML template merging (including
154
+ the CDC WONDER radio-button-trap regression), AAR constraints, provider
155
+ selection, and the offline-network-free CLI commands.
156
+
157
+ Integration tests (`tests/integration/`) are excluded by default and split
158
+ into two kinds:
159
+
160
+ - **`test_socks_proxy_integration.py`** — always runs. Spins up a local
161
+ SOCKS5 relay and a local mock LLM HTTP server, so it genuinely exercises
162
+ `LLM_HTTP_PROXY` end-to-end (real SOCKS handshake, real HTTP
163
+ request/response) without needing real Azure/Anthropic credentials.
164
+ - **`test_llm_provider_live.py`** — hits whatever `ANTHROPIC_API_KEY` /
165
+ `LLM_PROVIDER=azure_openai` + `AZURE_OPENAI_*` / `LLM_HTTP_PROXY` you
166
+ actually have configured. Skips if credentials aren't set; also skips
167
+ (rather than fails) if the provider is reachable but blocked at the
168
+ network layer (e.g. an Azure OpenAI resource with public access disabled
169
+ and no working proxy) — that's an environment gap, not a code defect.
170
+
171
+ ## Bundled Datasets (with base templates)
172
+
173
+ | ID | Subject | Years |
174
+ |----|---------|-------|
175
+ | D176 | Provisional mortality — opioids, COVID, suicide, heart disease | 2018–present |
176
+ | D157 | Final mortality, single race (MCD+UCD) | 2018–2023 |
177
+ | D158 | Underlying cause of death, single race — maternal mortality | 2018–2023 |
178
+ | D77 | Multiple cause of death — drug deaths (historical) | 1999–2020 |
179
+ | D76 | Underlying cause of death — suicide, cancer (historical) | 1999–2020 |
180
+ | D141 | MCD with US-Mexico border regions | 1999–2020 |
181
+ | D140 | Compressed mortality ICD-10 | 1999–2016 |
182
+ | D16 | Compressed mortality ICD-9 | 1979–1998 |
183
+ | D74 | Compressed mortality ICD-8 | 1968–1978 |
184
+ | D69 | Linked birth/infant death records | 2007–2023 |
185
+ | D159 | Linked birth/infant death, expanded race | 2017–2023 |
186
+ | D31/D18/D23 | Linked birth/infant death (historical) | 1995–2006 |
187
+ | D66 | Natality — birth rates, birth outcomes | 2007–2024 |
188
+ | D149 | Natality, expanded race detail | 2016–2024 |
189
+ | D192 | Provisional natality (monthly) | 2023–present |
190
+ | D27/D10 | Natality (historical) | 1995–2006 |
191
+ | D8 | VAERS vaccine adverse events | 1990–present |
192
+ | D104 | Heat wave days by county | 1981–2010 |
193
+ | D60/D80/D81 | NLDAS temperature, sunlight, precipitation | 1979–2011 |
194
+ | D73 | PM2.5 fine particulate matter | 2003–2011 |
195
+ | D61 | MODIS land surface temperature | 2003–2008 |
196
+
197
+ ## Public Health Questions You Can Answer
198
+
199
+ - How did opioid overdose deaths trend from 1999 to today, broken down by drug type?
200
+ - What is the racial gap in COVID-19 mortality?
201
+ - How does maternal mortality differ by race and state?
202
+ - Which states have the highest suicide rates by sex?
203
+ - How have birth rates changed by age of mother since 1995?
204
+ - Are tick-borne disease cases increasing?
205
+ - How do PM2.5 air quality levels correlate with where people live?
206
+ - What are the most common adverse events reported after COVID vaccines?
207
+
208
+ ## Releasing
209
+
210
+ Releases are cut by pushing a tag. `publish.yml` (single workflow, one run
211
+ per tag) handles the rest as three sequential jobs:
212
+
213
+ 1. Bump `version` in `pyproject.toml`, commit it.
214
+ 2. `git tag vX.Y.Z && git push origin vX.Y.Z`
215
+ 3. **`build`** builds the sdist/wheel, failing fast if the tag doesn't match
216
+ `pyproject.toml`'s version.
217
+ 4. **`release`** (needs `build`) creates the GitHub Release with the built
218
+ artifacts attached — the source of truth for what shipped.
219
+ 5. **`publish`** (needs `release`) publishes those same artifacts to PyPI
220
+ (`pulse-code`) via trusted publishing (OIDC) against the `prod`
221
+ environment — no API tokens stored in the repo.
222
+
223
+ The `needs:` chain means a failure at any step blocks everything after it —
224
+ e.g. a PyPI hiccup can't leave a GitHub Release around for a package that
225
+ isn't actually installable. If the `publish` job fails after `release`
226
+ succeeds, use "Re-run failed jobs" on that workflow run rather than
227
+ re-tagging. PyPI publishing is immutable: once a version is published it
228
+ can't be re-uploaded, so a bad release means bumping to a new version.
229
+
230
+ ## Based On
231
+
232
+ Built using [fartbagxp/health](https://github.com/fartbagxp/health) as reference — a comprehensive collection of CDC data pipelines and the CDC WONDER XML API client and LLM query builder this tool builds on.
@@ -0,0 +1,43 @@
1
+ [build-system]
2
+ requires = ["setuptools==82.0.1"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "pulse-code"
7
+ version = "1.0.1"
8
+ description = "CDC WONDER query CLI — explore, build, and refine public health data queries"
9
+ readme = "README.md"
10
+ requires-python = ">=3.14"
11
+ dependencies = [
12
+ "anthropic==0.115.1",
13
+ "openai==2.44.0",
14
+ "pydantic==2.13.4",
15
+ "python-dotenv==1.2.2",
16
+ "requests==2.34.2",
17
+ "rich==15.0.0",
18
+ "socksio==1.0.0",
19
+ "typer==0.26.8",
20
+ ]
21
+
22
+ [dependency-groups]
23
+ dev = [
24
+ "pre-commit==4.6.0",
25
+ "pytest==9.1.1",
26
+ ]
27
+
28
+ [tool.setuptools.packages.find]
29
+ where = ["src"]
30
+
31
+ [tool.setuptools.package-data]
32
+ pulse = ["data/*.json", "templates/*.xml", "queries/*.xml"]
33
+
34
+ [project.scripts]
35
+ pulse = "pulse.cli:app"
36
+
37
+ [tool.pytest.ini_options]
38
+ testpaths = ["tests"]
39
+ pythonpath = ["src", "."]
40
+ markers = [
41
+ "integration: exercises real sockets (localhost SOCKS/HTTP servers, or live LLM/CDC WONDER APIs). Excluded by default — run with `-m integration`.",
42
+ ]
43
+ addopts = "-m \"not integration\""
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1 @@
1
+ """pulse — CDC WONDER query CLI for exploring and refining public health data."""
@@ -0,0 +1,4 @@
1
+ from pulse.cli import app
2
+
3
+ if __name__ == "__main__":
4
+ app()