chktm 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chktm-0.1.0/.claude/settings.local.json +45 -0
- chktm-0.1.0/.gitignore +210 -0
- chktm-0.1.0/AGENTS.md +76 -0
- chktm-0.1.0/BACKLOG.md +61 -0
- chktm-0.1.0/CHANGELOG.md +143 -0
- chktm-0.1.0/CODE_OF_CONDUCT.md +12 -0
- chktm-0.1.0/CONTRIBUTING.md +129 -0
- chktm-0.1.0/Containerfile +59 -0
- chktm-0.1.0/LICENSE +201 -0
- chktm-0.1.0/PKG-INFO +325 -0
- chktm-0.1.0/README.md +308 -0
- chktm-0.1.0/SBOM.md +89 -0
- chktm-0.1.0/SECURITY.md +154 -0
- chktm-0.1.0/SPEC.md +210 -0
- chktm-0.1.0/deploy/openshift/deployment.yaml +75 -0
- chktm-0.1.0/deploy/openshift/init-job.yaml +68 -0
- chktm-0.1.0/deploy/openshift/namespace.yaml +8 -0
- chktm-0.1.0/deploy/openshift/pvc.yaml +17 -0
- chktm-0.1.0/deploy/openshift/route.yaml +20 -0
- chktm-0.1.0/deploy/openshift/secret.yaml +19 -0
- chktm-0.1.0/deploy/openshift/service.yaml +19 -0
- chktm-0.1.0/deploy/openshift/update-cronjob.yaml +68 -0
- chktm-0.1.0/docs/architecture.md +364 -0
- chktm-0.1.0/docs/chktm.1 +196 -0
- chktm-0.1.0/docs/deployment.md +306 -0
- chktm-0.1.0/docs/recon-phase1.md +303 -0
- chktm-0.1.0/docs/testing-mcp.md +254 -0
- chktm-0.1.0/docs/usage-guide.md +627 -0
- chktm-0.1.0/pyproject.toml +41 -0
- chktm-0.1.0/src/chktm/__init__.py +2 -0
- chktm-0.1.0/src/chktm/cli.py +784 -0
- chktm-0.1.0/src/chktm/config.py +79 -0
- chktm-0.1.0/src/chktm/disclaimer.py +10 -0
- chktm-0.1.0/src/chktm/fetch.py +329 -0
- chktm-0.1.0/src/chktm/ingest.py +224 -0
- chktm-0.1.0/src/chktm/mcp_server.py +240 -0
- chktm-0.1.0/src/chktm/pipeline.py +190 -0
- chktm-0.1.0/src/chktm/report.py +797 -0
- chktm-0.1.0/src/chktm/schema.py +120 -0
- chktm-0.1.0/src/chktm/search.py +257 -0
- chktm-0.1.0/src/chktm/web.py +319 -0
- chktm-0.1.0/tests/__init__.py +0 -0
- chktm-0.1.0/tests/fixtures/sample_daily.xml +2259 -0
- chktm-0.1.0/tests/fixtures/sample_edge_cases.xml +152 -0
- chktm-0.1.0/tests/test_ingest.py +134 -0
- chktm-0.1.0/tests/test_report.py +128 -0
- chktm-0.1.0/tests/test_schema.py +66 -0
- chktm-0.1.0/tests/test_search.py +116 -0
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
{
|
|
2
|
+
"permissions": {
|
|
3
|
+
"allow": [
|
|
4
|
+
"WebSearch",
|
|
5
|
+
"WebFetch(domain:data.uspto.gov)",
|
|
6
|
+
"WebFetch(domain:www.uspto.gov)",
|
|
7
|
+
"WebFetch(domain:developer.uspto.gov)",
|
|
8
|
+
"WebFetch(domain:bulkdata.uspto.gov)",
|
|
9
|
+
"Bash(sudo dnf:*)",
|
|
10
|
+
"WebFetch(domain:catalog.data.gov)",
|
|
11
|
+
"Bash(pdftotext \"/home/nickschuetz/.config/claude-code/personal/projects/-home-nickschuetz-code-chktm/f5ff2b24-6468-4643-b31e-1e92f19055ff/tool-results/webfetch-1775849848164-30lf7k.pdf\" -)",
|
|
12
|
+
"Bash(curl -s 'https://data.uspto.gov/api/v1/datasets/products/search?productTitle=Trademark')",
|
|
13
|
+
"Bash(curl -s -H 'Accept: application/json' 'https://data.uspto.gov/ptab-api/search/products?rows=50&start=0&largeTextSearchFlag=N&productTitle=Trademark')",
|
|
14
|
+
"Bash(python3 -m json.tool)",
|
|
15
|
+
"Bash(curl -sv 'https://data.uspto.gov/api/v1/search/products?rows=50&start=0&largeTextSearchFlag=N&productTitle=Trademark')",
|
|
16
|
+
"WebFetch(domain:github.com)",
|
|
17
|
+
"Bash(curl -s -H 'Accept: application/json' -H 'X-API-KEY: dummy' 'https://data.uspto.gov/api/v1/bulk-data/product/TRTDXFAP')",
|
|
18
|
+
"Bash(curl -s -H 'Accept: application/json' 'https://api.uspto.gov/v1/bulk-data/product/TRTDXFAP')",
|
|
19
|
+
"Bash(curl -s -H 'Accept: application/json' 'https://api.uspto.gov/v3/bulk-data/products/TRTDXFAP/files?rows=5&start=0')",
|
|
20
|
+
"Bash(pdftotext \"/home/nickschuetz/.config/claude-code/personal/projects/-home-nickschuetz-code-chktm/f5ff2b24-6468-4643-b31e-1e92f19055ff/tool-results/webfetch-1775850107225-n96rdd.pdf\" -)",
|
|
21
|
+
"Bash(curl -sI 'https://bulkdata.uspto.gov/data/trademark/dailyxml/applications/apc260101.zip')",
|
|
22
|
+
"Bash(curl -s 'https://api.uspto.gov/v3/bulk-data/products?productNameFilter=Trademark' -H 'Accept: application/json')",
|
|
23
|
+
"Bash(curl -sv 'https://bulkdata.uspto.gov/data/trademark/dailyxml/applications/')",
|
|
24
|
+
"Bash(curl -s 'https://api.uspto.gov/api/v1/datasets/products/search?q=Trademark&limit=20&offset=0' -H 'Accept: application/json')",
|
|
25
|
+
"Bash(curl -s -o /tmp/bdss-odp-mapping.pdf 'https://data.uspto.gov/documents/documents/BDSS-to-ODP-API-Mapping.pdf')",
|
|
26
|
+
"Bash(pdftotext /tmp/bdss-odp-mapping.pdf -)",
|
|
27
|
+
"WebFetch(domain:raw.githubusercontent.com)",
|
|
28
|
+
"Bash(curl -s 'https://raw.githubusercontent.com/patent-dev/uspto-odp/main/swagger_fixed.yaml')",
|
|
29
|
+
"Bash(curl -s 'https://raw.githubusercontent.com/patent-dev/uspto-odp/main/client.go')",
|
|
30
|
+
"WebFetch(domain:uspto.report)",
|
|
31
|
+
"Bash(python3 -m pip install -e /home/nickschuetz/code/chktm)",
|
|
32
|
+
"Bash(dnf list:*)",
|
|
33
|
+
"Bash(PYTHONPATH=src python3:*)",
|
|
34
|
+
"Bash(python3 -m ensurepip)",
|
|
35
|
+
"Bash(python3 -m pip install pytest typer rich --quiet)",
|
|
36
|
+
"Bash(python3 -m pip install defusedxml --quiet)",
|
|
37
|
+
"Bash(python3:*)",
|
|
38
|
+
"Bash(echo \"exit: $?\")",
|
|
39
|
+
"Bash(mkdir -p /mnt/d/Storage/claude/chktm)",
|
|
40
|
+
"Bash(mv /home/nickschuetz/code/chktm/data/* /mnt/d/Storage/claude/chktm/)",
|
|
41
|
+
"Bash(rmdir /home/nickschuetz/code/chktm/data)",
|
|
42
|
+
"Bash(PYTHONPATH=src CHKTM_DATA_DIR=/mnt/d/Storage/claude/chktm python3:*)"
|
|
43
|
+
]
|
|
44
|
+
}
|
|
45
|
+
}
|
chktm-0.1.0/.gitignore
ADDED
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
# chktm data directory (downloaded USPTO bulk data + SQLite DB)
|
|
2
|
+
data/
|
|
3
|
+
|
|
4
|
+
# Byte-compiled / optimized / DLL files
|
|
5
|
+
__pycache__/
|
|
6
|
+
*.py[codz]
|
|
7
|
+
*$py.class
|
|
8
|
+
|
|
9
|
+
# C extensions
|
|
10
|
+
*.so
|
|
11
|
+
|
|
12
|
+
# Distribution / packaging
|
|
13
|
+
.Python
|
|
14
|
+
build/
|
|
15
|
+
develop-eggs/
|
|
16
|
+
dist/
|
|
17
|
+
downloads/
|
|
18
|
+
eggs/
|
|
19
|
+
.eggs/
|
|
20
|
+
lib/
|
|
21
|
+
lib64/
|
|
22
|
+
parts/
|
|
23
|
+
sdist/
|
|
24
|
+
var/
|
|
25
|
+
wheels/
|
|
26
|
+
share/python-wheels/
|
|
27
|
+
*.egg-info/
|
|
28
|
+
.installed.cfg
|
|
29
|
+
*.egg
|
|
30
|
+
MANIFEST
|
|
31
|
+
|
|
32
|
+
# PyInstaller
|
|
33
|
+
# Usually these files are written by a python script from a template
|
|
34
|
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
|
35
|
+
*.manifest
|
|
36
|
+
*.spec
|
|
37
|
+
|
|
38
|
+
# Installer logs
|
|
39
|
+
pip-log.txt
|
|
40
|
+
pip-delete-this-directory.txt
|
|
41
|
+
|
|
42
|
+
# Unit test / coverage reports
|
|
43
|
+
htmlcov/
|
|
44
|
+
.tox/
|
|
45
|
+
.nox/
|
|
46
|
+
.coverage
|
|
47
|
+
.coverage.*
|
|
48
|
+
.cache
|
|
49
|
+
nosetests.xml
|
|
50
|
+
coverage.xml
|
|
51
|
+
*.cover
|
|
52
|
+
*.py.cover
|
|
53
|
+
.hypothesis/
|
|
54
|
+
.pytest_cache/
|
|
55
|
+
cover/
|
|
56
|
+
|
|
57
|
+
# Translations
|
|
58
|
+
*.mo
|
|
59
|
+
*.pot
|
|
60
|
+
|
|
61
|
+
# Django stuff:
|
|
62
|
+
*.log
|
|
63
|
+
local_settings.py
|
|
64
|
+
db.sqlite3
|
|
65
|
+
db.sqlite3-journal
|
|
66
|
+
|
|
67
|
+
# Flask stuff:
|
|
68
|
+
instance/
|
|
69
|
+
.webassets-cache
|
|
70
|
+
|
|
71
|
+
# Scrapy stuff:
|
|
72
|
+
.scrapy
|
|
73
|
+
|
|
74
|
+
# Sphinx documentation
|
|
75
|
+
docs/_build/
|
|
76
|
+
|
|
77
|
+
# PyBuilder
|
|
78
|
+
.pybuilder/
|
|
79
|
+
target/
|
|
80
|
+
|
|
81
|
+
# Jupyter Notebook
|
|
82
|
+
.ipynb_checkpoints
|
|
83
|
+
|
|
84
|
+
# IPython
|
|
85
|
+
profile_default/
|
|
86
|
+
ipython_config.py
|
|
87
|
+
|
|
88
|
+
# pyenv
|
|
89
|
+
# For a library or package, you might want to ignore these files since the code is
|
|
90
|
+
# intended to run in multiple environments; otherwise, check them in:
|
|
91
|
+
# .python-version
|
|
92
|
+
|
|
93
|
+
# pipenv
|
|
94
|
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
|
95
|
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
|
96
|
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
|
97
|
+
# install all needed dependencies.
|
|
98
|
+
#Pipfile.lock
|
|
99
|
+
|
|
100
|
+
# UV
|
|
101
|
+
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
|
102
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
103
|
+
# commonly ignored for libraries.
|
|
104
|
+
#uv.lock
|
|
105
|
+
|
|
106
|
+
# poetry
|
|
107
|
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
|
108
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
109
|
+
# commonly ignored for libraries.
|
|
110
|
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
|
111
|
+
#poetry.lock
|
|
112
|
+
#poetry.toml
|
|
113
|
+
|
|
114
|
+
# pdm
|
|
115
|
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
|
116
|
+
# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
|
|
117
|
+
# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
|
|
118
|
+
#pdm.lock
|
|
119
|
+
#pdm.toml
|
|
120
|
+
.pdm-python
|
|
121
|
+
.pdm-build/
|
|
122
|
+
|
|
123
|
+
# pixi
|
|
124
|
+
# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
|
|
125
|
+
#pixi.lock
|
|
126
|
+
# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
|
|
127
|
+
# in the .venv directory. It is recommended not to include this directory in version control.
|
|
128
|
+
.pixi
|
|
129
|
+
|
|
130
|
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
|
131
|
+
__pypackages__/
|
|
132
|
+
|
|
133
|
+
# Celery stuff
|
|
134
|
+
celerybeat-schedule
|
|
135
|
+
celerybeat.pid
|
|
136
|
+
|
|
137
|
+
# SageMath parsed files
|
|
138
|
+
*.sage.py
|
|
139
|
+
|
|
140
|
+
# Environments
|
|
141
|
+
.env
|
|
142
|
+
.envrc
|
|
143
|
+
.venv
|
|
144
|
+
env/
|
|
145
|
+
venv/
|
|
146
|
+
ENV/
|
|
147
|
+
env.bak/
|
|
148
|
+
venv.bak/
|
|
149
|
+
|
|
150
|
+
# Spyder project settings
|
|
151
|
+
.spyderproject
|
|
152
|
+
.spyproject
|
|
153
|
+
|
|
154
|
+
# Rope project settings
|
|
155
|
+
.ropeproject
|
|
156
|
+
|
|
157
|
+
# mkdocs documentation
|
|
158
|
+
/site
|
|
159
|
+
|
|
160
|
+
# mypy
|
|
161
|
+
.mypy_cache/
|
|
162
|
+
.dmypy.json
|
|
163
|
+
dmypy.json
|
|
164
|
+
|
|
165
|
+
# Pyre type checker
|
|
166
|
+
.pyre/
|
|
167
|
+
|
|
168
|
+
# pytype static type analyzer
|
|
169
|
+
.pytype/
|
|
170
|
+
|
|
171
|
+
# Cython debug symbols
|
|
172
|
+
cython_debug/
|
|
173
|
+
|
|
174
|
+
# PyCharm
|
|
175
|
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
|
176
|
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
|
177
|
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
|
178
|
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
|
179
|
+
#.idea/
|
|
180
|
+
|
|
181
|
+
# Abstra
|
|
182
|
+
# Abstra is an AI-powered process automation framework.
|
|
183
|
+
# Ignore directories containing user credentials, local state, and settings.
|
|
184
|
+
# Learn more at https://abstra.io/docs
|
|
185
|
+
.abstra/
|
|
186
|
+
|
|
187
|
+
# Visual Studio Code
|
|
188
|
+
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
|
|
189
|
+
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
|
|
190
|
+
# and can be added to the global gitignore or merged into this file. However, if you prefer,
|
|
191
|
+
# you could uncomment the following to ignore the entire vscode folder
|
|
192
|
+
# .vscode/
|
|
193
|
+
|
|
194
|
+
# Ruff stuff:
|
|
195
|
+
.ruff_cache/
|
|
196
|
+
|
|
197
|
+
# PyPI configuration file
|
|
198
|
+
.pypirc
|
|
199
|
+
|
|
200
|
+
# Cursor
|
|
201
|
+
# Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
|
|
202
|
+
# exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
|
|
203
|
+
# refer to https://docs.cursor.com/context/ignore-files
|
|
204
|
+
.cursorignore
|
|
205
|
+
.cursorindexingignore
|
|
206
|
+
|
|
207
|
+
# Marimo
|
|
208
|
+
marimo/_static/
|
|
209
|
+
marimo/_lsp/
|
|
210
|
+
__marimo__/
|
chktm-0.1.0/AGENTS.md
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
# AGENTS.md
|
|
2
|
+
|
|
3
|
+
Instructions for AI coding agents working in this repo.
|
|
4
|
+
|
|
5
|
+
## Source of truth
|
|
6
|
+
|
|
7
|
+
`SPEC.md` is the authoritative scope for v0.1. If this file and SPEC.md disagree
|
|
8
|
+
about *what* to build, SPEC.md wins. This file governs *how* to build it.
|
|
9
|
+
|
|
10
|
+
Anything not in SPEC.md's v0.1 scope goes in `BACKLOG.md`, not in the code.
|
|
11
|
+
|
|
12
|
+
## Build in phases, stop between them
|
|
13
|
+
|
|
14
|
+
SPEC.md defines five phases. Treat the stops between phases as hard stops:
|
|
15
|
+
finish the phase, report results to the human, and wait for a go-ahead before
|
|
16
|
+
starting the next one. Do not chain phases together unprompted.
|
|
17
|
+
|
|
18
|
+
Phase 1 in particular is a **recon-only** phase. Do not write application code
|
|
19
|
+
during Phase 1. Verify the live USPTO Open Data Portal (data.uspto.gov) details
|
|
20
|
+
firsthand — URL patterns, file formats, schema, update cadence — and produce a
|
|
21
|
+
short written recon report. The rest of the build depends on this being right.
|
|
22
|
+
|
|
23
|
+
## Time budget
|
|
24
|
+
|
|
25
|
+
Target: 8–12 hours total across all phases.
|
|
26
|
+
Hard stop: 16 hours. If you hit 16 hours and v0.1 is not shipped, stop and
|
|
27
|
+
surface the problem. Do not silently keep going.
|
|
28
|
+
|
|
29
|
+
## Scope discipline
|
|
30
|
+
|
|
31
|
+
- If a change feels like it's growing the scope, stop and ask.
|
|
32
|
+
- "While I'm here" refactors are not free. Skip them unless they're load-bearing
|
|
33
|
+
for the current phase.
|
|
34
|
+
- New dependencies need a one-line justification in the commit message.
|
|
35
|
+
- DuckDB vs SQLite is an open call per SPEC.md — if you pick DuckDB, document
|
|
36
|
+
why in the commit and in `README.md`.
|
|
37
|
+
|
|
38
|
+
## Commits
|
|
39
|
+
|
|
40
|
+
- Sign off every commit: `git commit -s` (DCO, no CLA — see SPEC.md).
|
|
41
|
+
- Conventional-ish messages are fine but not required. Clarity beats format.
|
|
42
|
+
- One logical change per commit. Recon notes, schema, ingest, fetch, search,
|
|
43
|
+
report, docs — these should not all land in one commit.
|
|
44
|
+
- Never commit downloaded USPTO data, the SQLite database, or anything under
|
|
45
|
+
`data/`. Add to `.gitignore` early.
|
|
46
|
+
|
|
47
|
+
## Code conventions
|
|
48
|
+
|
|
49
|
+
- Python 3.11+.
|
|
50
|
+
- `pyproject.toml` with `hatchling` or `setuptools` — your call, document it.
|
|
51
|
+
- Formatter: `ruff format`. Linter: `ruff check`. No black, no flake8, no isort.
|
|
52
|
+
- Type hints on all public functions. `from __future__ import annotations` at
|
|
53
|
+
the top of every module.
|
|
54
|
+
- Apache-2.0 SPDX header at the top of every source file:
|
|
55
|
+
`# SPDX-License-Identifier: Apache-2.0`
|
|
56
|
+
- CLI framework: Typer (per SPEC.md). Don't substitute Click or argparse.
|
|
57
|
+
|
|
58
|
+
## Testing
|
|
59
|
+
|
|
60
|
+
- `pytest`. Fixtures live under `tests/fixtures/`.
|
|
61
|
+
- Every module in `src/chktm/` should have a corresponding `test_*.py`.
|
|
62
|
+
- Tests must not hit the network. If a test needs USPTO data, it uses a
|
|
63
|
+
checked-in fixture XML file under `tests/fixtures/`.
|
|
64
|
+
- `pytest` with no arguments must pass before any phase is considered done.
|
|
65
|
+
|
|
66
|
+
## The disclaimer is load-bearing
|
|
67
|
+
|
|
68
|
+
chktm is a research aid, not legal clearance. That sentence (or a close
|
|
69
|
+
variant) must appear in: `README.md`, `--help` output, every generated report,
|
|
70
|
+
and the top of `src/chktm/disclaimer.py` as the single source of truth that
|
|
71
|
+
the other surfaces import from. Do not paraphrase it into something softer.
|
|
72
|
+
|
|
73
|
+
## When in doubt
|
|
74
|
+
|
|
75
|
+
Stop and ask the human. A two-line clarifying question is cheaper than an
|
|
76
|
+
hour of work in the wrong direction.
|
chktm-0.1.0/BACKLOG.md
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# Backlog
|
|
2
|
+
|
|
3
|
+
Items explicitly deferred from v0.1. These are not bugs — they are conscious
|
|
4
|
+
scope boundaries. PRs for these items should open an issue for discussion first.
|
|
5
|
+
|
|
6
|
+
## Search improvements
|
|
7
|
+
|
|
8
|
+
- **Fuzzy matching** — Levenshtein distance, n-gram similarity for catching
|
|
9
|
+
near-miss typos (e.g., "thundercorp" vs "thundrcorp")
|
|
10
|
+
- **Phonetic matching** — Soundex, Metaphone, or Double Metaphone for catching
|
|
11
|
+
sound-alike marks (e.g., "Hella" vs "Hela")
|
|
12
|
+
- **Weighting/scoring** — More nuanced risk scoring beyond the current three-tier
|
|
13
|
+
system (exact match weight, prefix match, class distance)
|
|
14
|
+
|
|
15
|
+
## Data sources
|
|
16
|
+
|
|
17
|
+
- **Multi-jurisdiction** — EU (EUIPO), UK (IPO), WIPO Madrid Protocol, Canada
|
|
18
|
+
(CIPO). Each has its own bulk data format.
|
|
19
|
+
- **Common-law usage checks** — Search Steam, itch.io, GitHub, app stores for
|
|
20
|
+
unregistered marks in the same space
|
|
21
|
+
- **TSDR real-time lookup** — Query `tsdrapi.uspto.gov` for individual case
|
|
22
|
+
details (different API, different key)
|
|
23
|
+
- **Official status codes table** — Download and parse
|
|
24
|
+
`Table1TrademarkStatusCodes_20250813.doc` for the definitive live/dead mapping
|
|
25
|
+
instead of the current range-based heuristic
|
|
26
|
+
|
|
27
|
+
## Distribution
|
|
28
|
+
|
|
29
|
+
- **PyPI publishing** — `pip install chktm` instead of `pipx install git+...`
|
|
30
|
+
- **Homebrew formula** — For macOS users
|
|
31
|
+
- **Pre-built container images** — Automated CI/CD to build and push to quay.io
|
|
32
|
+
on tag
|
|
33
|
+
|
|
34
|
+
## Web UI
|
|
35
|
+
|
|
36
|
+
- **Saved searches** — Persist search queries for monitoring over time
|
|
37
|
+
- **Diff reports** — Compare results between runs to surface new conflicts
|
|
38
|
+
- **Authentication** — OAuth proxy or basic auth for the web UI and MCP endpoints
|
|
39
|
+
- **Rate limiting** — Per-client rate limiting on the web API
|
|
40
|
+
|
|
41
|
+
## Infrastructure
|
|
42
|
+
|
|
43
|
+
- **CI/CD pipeline** — GitHub Actions for tests, linting, container builds
|
|
44
|
+
- **Helm chart** — Parameterized OpenShift/Kubernetes deployment
|
|
45
|
+
- **CycloneDX SBOM export** — Machine-readable SBOM in addition to the current
|
|
46
|
+
human-readable `SBOM.md`
|
|
47
|
+
- **Database backend alternatives** — PostgreSQL or DuckDB for multi-replica
|
|
48
|
+
deployments where SQLite's single-writer limitation is a problem
|
|
49
|
+
|
|
50
|
+
## Agent integration
|
|
51
|
+
|
|
52
|
+
- **MCP authentication** — Token-based auth for the MCP endpoints
|
|
53
|
+
- **Batch monitoring** — MCP tool to search a large watchlist of terms on a
|
|
54
|
+
schedule and report only new/changed results since last check
|
|
55
|
+
- **Monitoring tool** — MCP tool to set up alerts when new marks are filed that
|
|
56
|
+
match a watched term
|
|
57
|
+
|
|
58
|
+
## Documentation
|
|
59
|
+
|
|
60
|
+
No outstanding documentation items. Man page at `docs/chktm.1`, API reference
|
|
61
|
+
at `/docs` (Swagger UI) and `/redoc` (ReDoc) are included in v0.1.
|
chktm-0.1.0/CHANGELOG.md
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [Unreleased]
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
- Project skeleton: `pyproject.toml` (hatchling), `src/chktm/` package layout
|
|
12
|
+
- `schema.py` — SQLite schema (marks, mark_classes, meta tables), text
|
|
13
|
+
normalization, date parsing, status-code live/dead heuristic
|
|
14
|
+
- `ingest.py` — Streaming XML parser using `defusedxml.ElementTree.iterparse`
|
|
15
|
+
with batched upserts and memory-bounded processing
|
|
16
|
+
- `cli.py` — Typer CLI with commands: `init`, `update`, `search`, `status`,
|
|
17
|
+
`version`. All commands support `--json` for structured machine-readable output
|
|
18
|
+
- `disclaimer.py` — Single source of truth for legal disclaimer text
|
|
19
|
+
- `bulk_ingest_mode()` context manager for tuned SQLite writes
|
|
20
|
+
(`synchronous=NORMAL`, 64 MB cache)
|
|
21
|
+
- `CHKTM_DATA_DIR` environment variable support for overriding default data
|
|
22
|
+
directory
|
|
23
|
+
- Stable exit codes: 0 (success), 1 (error), 2 (no database)
|
|
24
|
+
- `fetch.py` — Download module for USPTO ODP API with rate limiting
|
|
25
|
+
(4 ZIP/min), progress callbacks, resumability via file-size checks and
|
|
26
|
+
meta table tracking
|
|
27
|
+
- `search.py` — Query engine with normalized substring matching, class
|
|
28
|
+
filtering, and risk-tier classification (HIGH/MEDIUM/LOW)
|
|
29
|
+
- `report.py` — Markdown and JSON report rendering grouped by risk tier,
|
|
30
|
+
with disclaimer and TSDR links
|
|
31
|
+
- `chktm init` — Full pipeline: download annual backfile + daily files,
|
|
32
|
+
extract, ingest into SQLite, with Rich progress bars
|
|
33
|
+
- `chktm update` — Incremental daily file download and ingest since last
|
|
34
|
+
update date
|
|
35
|
+
- `chktm search` — Fully implemented with `--classes`, `--include-dead`,
|
|
36
|
+
`--out`, and `--json` flags
|
|
37
|
+
- Test suite: 54 tests covering schema utilities, ingest, search, and
|
|
38
|
+
report rendering
|
|
39
|
+
- Test fixtures: `sample_edge_cases.xml` (synthetic), `sample_daily.xml`
|
|
40
|
+
(extracted from USPTO daily file)
|
|
41
|
+
- `web.py` — FastAPI web application with lightweight search UI and REST API
|
|
42
|
+
(`GET /`, `GET /api/status`, `GET /api/search`)
|
|
43
|
+
- `mcp_server.py` — MCP (Model Context Protocol) server with `search_trademarks`
|
|
44
|
+
and `corpus_status` tools. Supports stdio, SSE, and Streamable HTTP transports
|
|
45
|
+
- `chktm serve` — CLI command to start the combined web UI + MCP server
|
|
46
|
+
- `Containerfile` — Multi-stage build for quay.io, runs as non-root (UID 1001),
|
|
47
|
+
PVC mount at `/data` for the SQLite database
|
|
48
|
+
- OpenShift deployment manifests in `deploy/openshift/`:
|
|
49
|
+
Namespace, PVC, Secret, init Job, Deployment, Service, Route, update CronJob
|
|
50
|
+
- `docs/deployment.md` — Full deployment guide for OpenShift with quay.io,
|
|
51
|
+
including MCP client configuration
|
|
52
|
+
- `SBOM.md` — Software Bill of Materials listing all direct and transitive
|
|
53
|
+
dependencies with licenses
|
|
54
|
+
- `docs/architecture.md` — Data flow, module responsibilities, database schema,
|
|
55
|
+
security model, AI agent interface contract
|
|
56
|
+
- `docs/recon-phase1.md` — Phase 1 recon report documenting USPTO ODP API
|
|
57
|
+
findings, XML schema, status codes, rate limits, and gotchas
|
|
58
|
+
- `README.md` — Project overview, quickstart, CLI reference, known limitations
|
|
59
|
+
- `CONTRIBUTING.md` — DCO sign-off, scope philosophy, setup, code style
|
|
60
|
+
- `BACKLOG.md` — Deferred items: fuzzy matching, multi-jurisdiction, PyPI, etc.
|
|
61
|
+
- `CODE_OF_CONDUCT.md` — Contributor Covenant v2.1
|
|
62
|
+
- `SECURITY.md` — Threat model, OWASP Top 10 compliance mapping, vuln reporting
|
|
63
|
+
- `docs/usage-guide.md` — Search best practices, class selection, risk tier
|
|
64
|
+
interpretation, agent efficiency tips for minimizing tokens/round-trips
|
|
65
|
+
- `docs/chktm.1` — Man page covering all commands, options, exit codes,
|
|
66
|
+
environment variables, config files, endpoints, and examples
|
|
67
|
+
- Interactive API reference at `/docs` (Swagger UI) and `/redoc` (ReDoc)
|
|
68
|
+
auto-generated by FastAPI from endpoint definitions
|
|
69
|
+
- `docs/testing-mcp.md` — Step-by-step MCP Inspector testing guide covering
|
|
70
|
+
Streamable HTTP, SSE, stdio, and CLI modes on all platforms
|
|
71
|
+
- Multiplatform documentation: all install, env var, and MCP config examples
|
|
72
|
+
include Linux, macOS, and Windows variants
|
|
73
|
+
- Version centralized in `src/chktm/__init__.py` — single source of truth for
|
|
74
|
+
CLI, web app, API responses, MCP protocol handshake, and pyproject.toml
|
|
75
|
+
- `--report legal` flag on `chktm search` — generates attorney-ready report
|
|
76
|
+
with executive summary, component word analysis (auto-splits compound terms),
|
|
77
|
+
risk assessment grouped by owner, limitations, and recommended next steps
|
|
78
|
+
- `generate_legal_report` MCP tool — same legal report available to AI agents
|
|
79
|
+
- PDF output for legal reports — `--out report.pdf` auto-detected from file
|
|
80
|
+
extension. Professional formatting with tables, color-coded risk tiers,
|
|
81
|
+
section headings, and print-ready layout. Uses `fpdf2` (pure Python, LGPL-3.0)
|
|
82
|
+
- `config.py` — Persistent config file (`~/.config/chktm/config.toml` on
|
|
83
|
+
Linux/macOS, `%APPDATA%\chktm\config.toml` on Windows). Saves data directory
|
|
84
|
+
during init so subsequent commands find the database automatically.
|
|
85
|
+
Resolution order: `--data-dir` flag > `CHKTM_DATA_DIR` env > config > `./data`
|
|
86
|
+
|
|
87
|
+
### Security
|
|
88
|
+
- XML parsing hardened with `defusedxml` to block entity expansion, XXE, and
|
|
89
|
+
DTD retrieval attacks
|
|
90
|
+
- All SQL queries use parameterized statements (`?` placeholders)
|
|
91
|
+
- SQL LIKE wildcards (`%`, `_`) escaped in user search terms to prevent
|
|
92
|
+
unintended pattern matching (OWASP A03:2021)
|
|
93
|
+
- Security headers on all web responses: CSP, X-Content-Type-Options,
|
|
94
|
+
X-Frame-Options, Referrer-Policy, Permissions-Policy (OWASP A05:2021)
|
|
95
|
+
- Input validation: query length (500 chars), term count (20), class count (45),
|
|
96
|
+
per-term length (200 chars), result cap (1,000 per term) (OWASP A04:2021)
|
|
97
|
+
- API key read from environment variable, never logged or committed
|
|
98
|
+
- Filename sanitization on downloads to prevent path traversal (OWASP A04)
|
|
99
|
+
- International class range validation (1-45) on web API (OWASP A04)
|
|
100
|
+
- Generic error messages to clients — internal paths no longer exposed (OWASP A01)
|
|
101
|
+
- `Strict-Transport-Security` (HSTS) header added (OWASP A05)
|
|
102
|
+
- `Cache-Control: no-store` header to prevent caching of search results
|
|
103
|
+
- Container hardened for OpenShift `restricted` SCC: arbitrary UID with GID 0,
|
|
104
|
+
`readOnlyRootFilesystem`, `drop: ALL` capabilities, `runAsNonRoot`,
|
|
105
|
+
`seccompProfile: RuntimeDefault`, `automountServiceAccountToken: false`
|
|
106
|
+
- All OpenShift manifests include pod and container security contexts
|
|
107
|
+
|
|
108
|
+
### Resilience
|
|
109
|
+
- Per-file retry with exponential backoff (2s, 4s, 8s) on network errors,
|
|
110
|
+
HTTP 429 (with Retry-After), and HTTP 5xx server errors
|
|
111
|
+
- Download integrity check: verifies file size matches API metadata after
|
|
112
|
+
download, deletes and retries on mismatch
|
|
113
|
+
- Disk space check before `init` starts (~30 GB required)
|
|
114
|
+
- Corrupt ZIP handling: `zipfile.BadZipFile` caught and logged, file deleted
|
|
115
|
+
and skipped instead of crashing the pipeline
|
|
116
|
+
- Graceful SIGINT/SIGTERM shutdown: finishes current file, commits to DB,
|
|
117
|
+
exits cleanly. Re-run to resume.
|
|
118
|
+
- Non-ZIP files in product listings (`.doc`, `.pdf`) filtered out
|
|
119
|
+
- JSON progress events emitted in `--json` mode for container/CI monitoring
|
|
120
|
+
- ETA display in progress bar based on running average per file
|
|
121
|
+
|
|
122
|
+
### Performance
|
|
123
|
+
- `pipeline.py` — Pipelined download + ingest using producer/consumer threading;
|
|
124
|
+
overlaps file download with ingest of the previous file (~20-25% faster)
|
|
125
|
+
- `--off-peak` flag on `init` and `update` — uses USPTO off-peak rate limits
|
|
126
|
+
(12 req/min vs 4 req/min, ~3x faster downloads between 10pm-5am EST)
|
|
127
|
+
- `--stream-ingest` flag — streams XML directly from ZIP to parser without
|
|
128
|
+
writing to disk (lower I/O, same memory via streaming decompression)
|
|
129
|
+
- ZIP files deleted after successful ingestion by default to save disk space;
|
|
130
|
+
`--keep-zips` flag on `init` and `update` to retain them
|
|
131
|
+
- WAL checkpoint (`PRAGMA wal_checkpoint(TRUNCATE)`) after init/update completes
|
|
132
|
+
to consolidate the WAL file into the main database
|
|
133
|
+
- Stack-based parent tracking in iterparse prevents memory leak on large files
|
|
134
|
+
- Batched DELETE for class associations using `WHERE IN` (999-element chunks)
|
|
135
|
+
instead of per-row deletes
|
|
136
|
+
- N+1 class query elimination — batch-fetches class associations in a single
|
|
137
|
+
chunked `WHERE IN` query instead of one query per match
|
|
138
|
+
- Composite index `(is_live, wordmark_normalized)` for faster filtered searches
|
|
139
|
+
- MCP server instructions trimmed from ~460 to ~80 tokens per connection
|
|
140
|
+
- Legal report component searches exclude dead marks and truncate goods/services
|
|
141
|
+
to reduce MCP response token usage
|
|
142
|
+
- SQLite bulk-ingest pragmas (`synchronous=NORMAL`, `cache_size=-64000`)
|
|
143
|
+
applied during ingestion via context manager
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
# Code of Conduct
|
|
2
|
+
|
|
3
|
+
This project follows the [Contributor Covenant v2.1](https://www.contributor-covenant.org/version/2/1/code_of_conduct/).
|
|
4
|
+
|
|
5
|
+
By participating in this project, you agree to abide by its terms.
|
|
6
|
+
|
|
7
|
+
## Reporting
|
|
8
|
+
|
|
9
|
+
If you experience or witness unacceptable behavior, please report it by opening
|
|
10
|
+
a GitHub issue or contacting the maintainer directly.
|
|
11
|
+
|
|
12
|
+
Reports will be reviewed and responded to promptly.
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
# Contributing to chktm
|
|
2
|
+
|
|
3
|
+
Thanks for your interest in contributing. chktm is deliberately small — it does
|
|
4
|
+
one thing (trademark screening) and tries to do it well.
|
|
5
|
+
|
|
6
|
+
## Scope philosophy
|
|
7
|
+
|
|
8
|
+
We are deliberately small. Features outside the v0.1 scope are tracked in
|
|
9
|
+
[BACKLOG.md](BACKLOG.md) and considered case-by-case. PRs that add scope without
|
|
10
|
+
discussion will be asked to open an issue first.
|
|
11
|
+
|
|
12
|
+
## Getting started
|
|
13
|
+
|
|
14
|
+
### Prerequisites
|
|
15
|
+
|
|
16
|
+
- Python 3.11+ (Linux, macOS, or Windows)
|
|
17
|
+
- Git
|
|
18
|
+
- A free [USPTO ODP API key](https://data.uspto.gov/apis/getting-started)
|
|
19
|
+
(only needed for `chktm init` / `chktm update`, not for development)
|
|
20
|
+
|
|
21
|
+
### Setup
|
|
22
|
+
|
|
23
|
+
**Linux / macOS:**
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
git clone https://github.com/nickschuetz/chktm.git
|
|
27
|
+
cd chktm
|
|
28
|
+
pip install -e .
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
**Windows (PowerShell):**
|
|
32
|
+
|
|
33
|
+
```powershell
|
|
34
|
+
git clone https://github.com/nickschuetz/chktm.git
|
|
35
|
+
cd chktm
|
|
36
|
+
pip install -e .
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
The install command is identical across platforms. If your system uses `pip3`
|
|
40
|
+
instead of `pip`, substitute accordingly.
|
|
41
|
+
|
|
42
|
+
### Run tests
|
|
43
|
+
|
|
44
|
+
**All platforms:**
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
pytest
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
Tests must not hit the network. They use checked-in fixture XML files under
|
|
51
|
+
`tests/fixtures/`. All tests must pass before any PR is merged.
|
|
52
|
+
|
|
53
|
+
### Code style
|
|
54
|
+
|
|
55
|
+
- Formatter: `ruff format`
|
|
56
|
+
- Linter: `ruff check`
|
|
57
|
+
- Type hints on all public functions
|
|
58
|
+
- `from __future__ import annotations` at the top of every module
|
|
59
|
+
- Apache-2.0 SPDX header at the top of every source file:
|
|
60
|
+
`# SPDX-License-Identifier: Apache-2.0`
|
|
61
|
+
|
|
62
|
+
**All platforms:**
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
ruff format src/ tests/
|
|
66
|
+
ruff check src/ tests/
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
### Platform notes for contributors
|
|
70
|
+
|
|
71
|
+
- **Paths:** Always use `pathlib.Path`, never hardcoded `/` or `\` separators.
|
|
72
|
+
- **Environment variables:** Document both `export` (Linux/macOS) and
|
|
73
|
+
`$env:` (Windows PowerShell) forms when referencing env vars in docs.
|
|
74
|
+
- **Line endings:** The repo uses LF line endings. Git's `core.autocrlf`
|
|
75
|
+
handles conversion on Windows.
|
|
76
|
+
- **Shell commands:** If a command differs between platforms, show all variants.
|
|
77
|
+
Commands that work identically everywhere need only be shown once.
|
|
78
|
+
|
|
79
|
+
## Commits
|
|
80
|
+
|
|
81
|
+
### DCO sign-off
|
|
82
|
+
|
|
83
|
+
All commits must be signed off under the
|
|
84
|
+
[Developer Certificate of Origin](https://developercertificate.org/):
|
|
85
|
+
|
|
86
|
+
```bash
|
|
87
|
+
git commit -s -m "your commit message"
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
This adds a `Signed-off-by:` line to your commit message, certifying that you
|
|
91
|
+
wrote the code or have the right to submit it under the project's license.
|
|
92
|
+
|
|
93
|
+
### Commit messages
|
|
94
|
+
|
|
95
|
+
- Clarity beats format. Conventional commits are fine but not required.
|
|
96
|
+
- One logical change per commit.
|
|
97
|
+
- Never commit downloaded USPTO data, the SQLite database, or anything under
|
|
98
|
+
`data/`.
|
|
99
|
+
|
|
100
|
+
## Pull requests
|
|
101
|
+
|
|
102
|
+
1. Open an issue first for anything beyond a trivial bug fix.
|
|
103
|
+
2. Keep PRs focused — one logical change per PR.
|
|
104
|
+
3. All tests must pass.
|
|
105
|
+
4. `ruff format` and `ruff check` must pass with no errors.
|
|
106
|
+
5. Update `CHANGELOG.md` under `[Unreleased]` with a brief description.
|
|
107
|
+
|
|
108
|
+
## What not to contribute (yet)
|
|
109
|
+
|
|
110
|
+
These are tracked in BACKLOG.md and not ready for PRs:
|
|
111
|
+
|
|
112
|
+
- Fuzzy/phonetic matching
|
|
113
|
+
- Web UI beyond the current lightweight search form
|
|
114
|
+
- Multi-jurisdiction support
|
|
115
|
+
- PyPI publishing
|
|
116
|
+
- Common-law usage checks
|
|
117
|
+
|
|
118
|
+
If you want to work on any of these, open an issue to discuss approach first.
|
|
119
|
+
|
|
120
|
+
## The disclaimer is load-bearing
|
|
121
|
+
|
|
122
|
+
chktm is a research aid, not legal clearance. That sentence must appear in:
|
|
123
|
+
the README, `--help` output, every generated report, and `src/chktm/disclaimer.py`
|
|
124
|
+
as the single source of truth. Do not paraphrase it into something softer.
|
|
125
|
+
|
|
126
|
+
## Questions?
|
|
127
|
+
|
|
128
|
+
Open an issue. A two-line clarifying question is cheaper than an hour of work
|
|
129
|
+
in the wrong direction.
|