chktm 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. chktm-0.1.0/.claude/settings.local.json +45 -0
  2. chktm-0.1.0/.gitignore +210 -0
  3. chktm-0.1.0/AGENTS.md +76 -0
  4. chktm-0.1.0/BACKLOG.md +61 -0
  5. chktm-0.1.0/CHANGELOG.md +143 -0
  6. chktm-0.1.0/CODE_OF_CONDUCT.md +12 -0
  7. chktm-0.1.0/CONTRIBUTING.md +129 -0
  8. chktm-0.1.0/Containerfile +59 -0
  9. chktm-0.1.0/LICENSE +201 -0
  10. chktm-0.1.0/PKG-INFO +325 -0
  11. chktm-0.1.0/README.md +308 -0
  12. chktm-0.1.0/SBOM.md +89 -0
  13. chktm-0.1.0/SECURITY.md +154 -0
  14. chktm-0.1.0/SPEC.md +210 -0
  15. chktm-0.1.0/deploy/openshift/deployment.yaml +75 -0
  16. chktm-0.1.0/deploy/openshift/init-job.yaml +68 -0
  17. chktm-0.1.0/deploy/openshift/namespace.yaml +8 -0
  18. chktm-0.1.0/deploy/openshift/pvc.yaml +17 -0
  19. chktm-0.1.0/deploy/openshift/route.yaml +20 -0
  20. chktm-0.1.0/deploy/openshift/secret.yaml +19 -0
  21. chktm-0.1.0/deploy/openshift/service.yaml +19 -0
  22. chktm-0.1.0/deploy/openshift/update-cronjob.yaml +68 -0
  23. chktm-0.1.0/docs/architecture.md +364 -0
  24. chktm-0.1.0/docs/chktm.1 +196 -0
  25. chktm-0.1.0/docs/deployment.md +306 -0
  26. chktm-0.1.0/docs/recon-phase1.md +303 -0
  27. chktm-0.1.0/docs/testing-mcp.md +254 -0
  28. chktm-0.1.0/docs/usage-guide.md +627 -0
  29. chktm-0.1.0/pyproject.toml +41 -0
  30. chktm-0.1.0/src/chktm/__init__.py +2 -0
  31. chktm-0.1.0/src/chktm/cli.py +784 -0
  32. chktm-0.1.0/src/chktm/config.py +79 -0
  33. chktm-0.1.0/src/chktm/disclaimer.py +10 -0
  34. chktm-0.1.0/src/chktm/fetch.py +329 -0
  35. chktm-0.1.0/src/chktm/ingest.py +224 -0
  36. chktm-0.1.0/src/chktm/mcp_server.py +240 -0
  37. chktm-0.1.0/src/chktm/pipeline.py +190 -0
  38. chktm-0.1.0/src/chktm/report.py +797 -0
  39. chktm-0.1.0/src/chktm/schema.py +120 -0
  40. chktm-0.1.0/src/chktm/search.py +257 -0
  41. chktm-0.1.0/src/chktm/web.py +319 -0
  42. chktm-0.1.0/tests/__init__.py +0 -0
  43. chktm-0.1.0/tests/fixtures/sample_daily.xml +2259 -0
  44. chktm-0.1.0/tests/fixtures/sample_edge_cases.xml +152 -0
  45. chktm-0.1.0/tests/test_ingest.py +134 -0
  46. chktm-0.1.0/tests/test_report.py +128 -0
  47. chktm-0.1.0/tests/test_schema.py +66 -0
  48. chktm-0.1.0/tests/test_search.py +116 -0
@@ -0,0 +1,45 @@
1
+ {
2
+ "permissions": {
3
+ "allow": [
4
+ "WebSearch",
5
+ "WebFetch(domain:data.uspto.gov)",
6
+ "WebFetch(domain:www.uspto.gov)",
7
+ "WebFetch(domain:developer.uspto.gov)",
8
+ "WebFetch(domain:bulkdata.uspto.gov)",
9
+ "Bash(sudo dnf:*)",
10
+ "WebFetch(domain:catalog.data.gov)",
11
+ "Bash(pdftotext \"/home/nickschuetz/.config/claude-code/personal/projects/-home-nickschuetz-code-chktm/f5ff2b24-6468-4643-b31e-1e92f19055ff/tool-results/webfetch-1775849848164-30lf7k.pdf\" -)",
12
+ "Bash(curl -s 'https://data.uspto.gov/api/v1/datasets/products/search?productTitle=Trademark')",
13
+ "Bash(curl -s -H 'Accept: application/json' 'https://data.uspto.gov/ptab-api/search/products?rows=50&start=0&largeTextSearchFlag=N&productTitle=Trademark')",
14
+ "Bash(python3 -m json.tool)",
15
+ "Bash(curl -sv 'https://data.uspto.gov/api/v1/search/products?rows=50&start=0&largeTextSearchFlag=N&productTitle=Trademark')",
16
+ "WebFetch(domain:github.com)",
17
+ "Bash(curl -s -H 'Accept: application/json' -H 'X-API-KEY: dummy' 'https://data.uspto.gov/api/v1/bulk-data/product/TRTDXFAP')",
18
+ "Bash(curl -s -H 'Accept: application/json' 'https://api.uspto.gov/v1/bulk-data/product/TRTDXFAP')",
19
+ "Bash(curl -s -H 'Accept: application/json' 'https://api.uspto.gov/v3/bulk-data/products/TRTDXFAP/files?rows=5&start=0')",
20
+ "Bash(pdftotext \"/home/nickschuetz/.config/claude-code/personal/projects/-home-nickschuetz-code-chktm/f5ff2b24-6468-4643-b31e-1e92f19055ff/tool-results/webfetch-1775850107225-n96rdd.pdf\" -)",
21
+ "Bash(curl -sI 'https://bulkdata.uspto.gov/data/trademark/dailyxml/applications/apc260101.zip')",
22
+ "Bash(curl -s 'https://api.uspto.gov/v3/bulk-data/products?productNameFilter=Trademark' -H 'Accept: application/json')",
23
+ "Bash(curl -sv 'https://bulkdata.uspto.gov/data/trademark/dailyxml/applications/')",
24
+ "Bash(curl -s 'https://api.uspto.gov/api/v1/datasets/products/search?q=Trademark&limit=20&offset=0' -H 'Accept: application/json')",
25
+ "Bash(curl -s -o /tmp/bdss-odp-mapping.pdf 'https://data.uspto.gov/documents/documents/BDSS-to-ODP-API-Mapping.pdf')",
26
+ "Bash(pdftotext /tmp/bdss-odp-mapping.pdf -)",
27
+ "WebFetch(domain:raw.githubusercontent.com)",
28
+ "Bash(curl -s 'https://raw.githubusercontent.com/patent-dev/uspto-odp/main/swagger_fixed.yaml')",
29
+ "Bash(curl -s 'https://raw.githubusercontent.com/patent-dev/uspto-odp/main/client.go')",
30
+ "WebFetch(domain:uspto.report)",
31
+ "Bash(python3 -m pip install -e /home/nickschuetz/code/chktm)",
32
+ "Bash(dnf list:*)",
33
+ "Bash(PYTHONPATH=src python3:*)",
34
+ "Bash(python3 -m ensurepip)",
35
+ "Bash(python3 -m pip install pytest typer rich --quiet)",
36
+ "Bash(python3 -m pip install defusedxml --quiet)",
37
+ "Bash(python3:*)",
38
+ "Bash(echo \"exit: $?\")",
39
+ "Bash(mkdir -p /mnt/d/Storage/claude/chktm)",
40
+ "Bash(mv /home/nickschuetz/code/chktm/data/* /mnt/d/Storage/claude/chktm/)",
41
+ "Bash(rmdir /home/nickschuetz/code/chktm/data)",
42
+ "Bash(PYTHONPATH=src CHKTM_DATA_DIR=/mnt/d/Storage/claude/chktm python3:*)"
43
+ ]
44
+ }
45
+ }
chktm-0.1.0/.gitignore ADDED
@@ -0,0 +1,210 @@
1
+ # chktm data directory (downloaded USPTO bulk data + SQLite DB)
2
+ data/
3
+
4
+ # Byte-compiled / optimized / DLL files
5
+ __pycache__/
6
+ *.py[codz]
7
+ *$py.class
8
+
9
+ # C extensions
10
+ *.so
11
+
12
+ # Distribution / packaging
13
+ .Python
14
+ build/
15
+ develop-eggs/
16
+ dist/
17
+ downloads/
18
+ eggs/
19
+ .eggs/
20
+ lib/
21
+ lib64/
22
+ parts/
23
+ sdist/
24
+ var/
25
+ wheels/
26
+ share/python-wheels/
27
+ *.egg-info/
28
+ .installed.cfg
29
+ *.egg
30
+ MANIFEST
31
+
32
+ # PyInstaller
33
+ # Usually these files are written by a python script from a template
34
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
35
+ *.manifest
36
+ *.spec
37
+
38
+ # Installer logs
39
+ pip-log.txt
40
+ pip-delete-this-directory.txt
41
+
42
+ # Unit test / coverage reports
43
+ htmlcov/
44
+ .tox/
45
+ .nox/
46
+ .coverage
47
+ .coverage.*
48
+ .cache
49
+ nosetests.xml
50
+ coverage.xml
51
+ *.cover
52
+ *.py.cover
53
+ .hypothesis/
54
+ .pytest_cache/
55
+ cover/
56
+
57
+ # Translations
58
+ *.mo
59
+ *.pot
60
+
61
+ # Django stuff:
62
+ *.log
63
+ local_settings.py
64
+ db.sqlite3
65
+ db.sqlite3-journal
66
+
67
+ # Flask stuff:
68
+ instance/
69
+ .webassets-cache
70
+
71
+ # Scrapy stuff:
72
+ .scrapy
73
+
74
+ # Sphinx documentation
75
+ docs/_build/
76
+
77
+ # PyBuilder
78
+ .pybuilder/
79
+ target/
80
+
81
+ # Jupyter Notebook
82
+ .ipynb_checkpoints
83
+
84
+ # IPython
85
+ profile_default/
86
+ ipython_config.py
87
+
88
+ # pyenv
89
+ # For a library or package, you might want to ignore these files since the code is
90
+ # intended to run in multiple environments; otherwise, check them in:
91
+ # .python-version
92
+
93
+ # pipenv
94
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
95
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
96
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
97
+ # install all needed dependencies.
98
+ #Pipfile.lock
99
+
100
+ # UV
101
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
102
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
103
+ # commonly ignored for libraries.
104
+ #uv.lock
105
+
106
+ # poetry
107
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
108
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
109
+ # commonly ignored for libraries.
110
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
111
+ #poetry.lock
112
+ #poetry.toml
113
+
114
+ # pdm
115
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
116
+ # pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
117
+ # https://pdm-project.org/en/latest/usage/project/#working-with-version-control
118
+ #pdm.lock
119
+ #pdm.toml
120
+ .pdm-python
121
+ .pdm-build/
122
+
123
+ # pixi
124
+ # Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
125
+ #pixi.lock
126
+ # Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
127
+ # in the .venv directory. It is recommended not to include this directory in version control.
128
+ .pixi
129
+
130
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
131
+ __pypackages__/
132
+
133
+ # Celery stuff
134
+ celerybeat-schedule
135
+ celerybeat.pid
136
+
137
+ # SageMath parsed files
138
+ *.sage.py
139
+
140
+ # Environments
141
+ .env
142
+ .envrc
143
+ .venv
144
+ env/
145
+ venv/
146
+ ENV/
147
+ env.bak/
148
+ venv.bak/
149
+
150
+ # Spyder project settings
151
+ .spyderproject
152
+ .spyproject
153
+
154
+ # Rope project settings
155
+ .ropeproject
156
+
157
+ # mkdocs documentation
158
+ /site
159
+
160
+ # mypy
161
+ .mypy_cache/
162
+ .dmypy.json
163
+ dmypy.json
164
+
165
+ # Pyre type checker
166
+ .pyre/
167
+
168
+ # pytype static type analyzer
169
+ .pytype/
170
+
171
+ # Cython debug symbols
172
+ cython_debug/
173
+
174
+ # PyCharm
175
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
176
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
177
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
178
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
179
+ #.idea/
180
+
181
+ # Abstra
182
+ # Abstra is an AI-powered process automation framework.
183
+ # Ignore directories containing user credentials, local state, and settings.
184
+ # Learn more at https://abstra.io/docs
185
+ .abstra/
186
+
187
+ # Visual Studio Code
188
+ # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
189
+ # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
190
+ # and can be added to the global gitignore or merged into this file. However, if you prefer,
191
+ # you could uncomment the following to ignore the entire vscode folder
192
+ # .vscode/
193
+
194
+ # Ruff stuff:
195
+ .ruff_cache/
196
+
197
+ # PyPI configuration file
198
+ .pypirc
199
+
200
+ # Cursor
201
+ # Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
202
+ # exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
203
+ # refer to https://docs.cursor.com/context/ignore-files
204
+ .cursorignore
205
+ .cursorindexingignore
206
+
207
+ # Marimo
208
+ marimo/_static/
209
+ marimo/_lsp/
210
+ __marimo__/
chktm-0.1.0/AGENTS.md ADDED
@@ -0,0 +1,76 @@
1
+ # AGENTS.md
2
+
3
+ Instructions for AI coding agents working in this repo.
4
+
5
+ ## Source of truth
6
+
7
+ `SPEC.md` is the authoritative scope for v0.1. If this file and SPEC.md disagree
8
+ about *what* to build, SPEC.md wins. This file governs *how* to build it.
9
+
10
+ Anything not in SPEC.md's v0.1 scope goes in `BACKLOG.md`, not in the code.
11
+
12
+ ## Build in phases, stop between them
13
+
14
+ SPEC.md defines five phases. Treat the stops between phases as hard stops:
15
+ finish the phase, report results to the human, and wait for a go-ahead before
16
+ starting the next one. Do not chain phases together unprompted.
17
+
18
+ Phase 1 in particular is a **recon-only** phase. Do not write application code
19
+ during Phase 1. Verify the live USPTO Open Data Portal (data.uspto.gov) details
20
+ firsthand — URL patterns, file formats, schema, update cadence — and produce a
21
+ short written recon report. The rest of the build depends on this being right.
22
+
23
+ ## Time budget
24
+
25
+ Target: 8–12 hours total across all phases.
26
+ Hard stop: 16 hours. If you hit 16 hours and v0.1 is not shipped, stop and
27
+ surface the problem. Do not silently keep going.
28
+
29
+ ## Scope discipline
30
+
31
+ - If a change feels like it's growing the scope, stop and ask.
32
+ - "While I'm here" refactors are not free. Skip them unless they're load-bearing
33
+ for the current phase.
34
+ - New dependencies need a one-line justification in the commit message.
35
+ - DuckDB vs SQLite is an open call per SPEC.md — if you pick DuckDB, document
36
+ why in the commit and in `README.md`.
37
+
38
+ ## Commits
39
+
40
+ - Sign off every commit: `git commit -s` (DCO, no CLA — see SPEC.md).
41
+ - Conventional-ish messages are fine but not required. Clarity beats format.
42
+ - One logical change per commit. Recon notes, schema, ingest, fetch, search,
43
+ report, docs — these should not all land in one commit.
44
+ - Never commit downloaded USPTO data, the SQLite database, or anything under
45
+ `data/`. Add to `.gitignore` early.
46
+
47
+ ## Code conventions
48
+
49
+ - Python 3.11+.
50
+ - `pyproject.toml` with `hatchling` or `setuptools` — your call, document it.
51
+ - Formatter: `ruff format`. Linter: `ruff check`. No black, no flake8, no isort.
52
+ - Type hints on all public functions. `from __future__ import annotations` at
53
+ the top of every module.
54
+ - Apache-2.0 SPDX header at the top of every source file:
55
+ `# SPDX-License-Identifier: Apache-2.0`
56
+ - CLI framework: Typer (per SPEC.md). Don't substitute Click or argparse.
57
+
58
+ ## Testing
59
+
60
+ - `pytest`. Fixtures live under `tests/fixtures/`.
61
+ - Every module in `src/chktm/` should have a corresponding `test_*.py`.
62
+ - Tests must not hit the network. If a test needs USPTO data, it uses a
63
+ checked-in fixture XML file under `tests/fixtures/`.
64
+ - `pytest` with no arguments must pass before any phase is considered done.
65
+
66
+ ## The disclaimer is load-bearing
67
+
68
+ chktm is a research aid, not legal clearance. That sentence (or a close
69
+ variant) must appear in: `README.md`, `--help` output, every generated report,
70
+ and the top of `src/chktm/disclaimer.py` as the single source of truth that
71
+ the other surfaces import from. Do not paraphrase it into something softer.
72
+
73
+ ## When in doubt
74
+
75
+ Stop and ask the human. A two-line clarifying question is cheaper than an
76
+ hour of work in the wrong direction.
chktm-0.1.0/BACKLOG.md ADDED
@@ -0,0 +1,61 @@
1
+ # Backlog
2
+
3
+ Items explicitly deferred from v0.1. These are not bugs — they are conscious
4
+ scope boundaries. PRs for these items should open an issue for discussion first.
5
+
6
+ ## Search improvements
7
+
8
+ - **Fuzzy matching** — Levenshtein distance, n-gram similarity for catching
9
+ near-miss typos (e.g., "thundercorp" vs "thundrcorp")
10
+ - **Phonetic matching** — Soundex, Metaphone, or Double Metaphone for catching
11
+ sound-alike marks (e.g., "Hella" vs "Hela")
12
+ - **Weighting/scoring** — More nuanced risk scoring beyond the current three-tier
13
+ system (exact match weight, prefix match, class distance)
14
+
15
+ ## Data sources
16
+
17
+ - **Multi-jurisdiction** — EU (EUIPO), UK (IPO), WIPO Madrid Protocol, Canada
18
+ (CIPO). Each has its own bulk data format.
19
+ - **Common-law usage checks** — Search Steam, itch.io, GitHub, app stores for
20
+ unregistered marks in the same space
21
+ - **TSDR real-time lookup** — Query `tsdrapi.uspto.gov` for individual case
22
+ details (different API, different key)
23
+ - **Official status codes table** — Download and parse
24
+ `Table1TrademarkStatusCodes_20250813.doc` for the definitive live/dead mapping
25
+ instead of the current range-based heuristic
26
+
27
+ ## Distribution
28
+
29
+ - **PyPI publishing** — `pip install chktm` instead of `pipx install git+...`
30
+ - **Homebrew formula** — For macOS users
31
+ - **Pre-built container images** — Automated CI/CD to build and push to quay.io
32
+ on tag
33
+
34
+ ## Web UI
35
+
36
+ - **Saved searches** — Persist search queries for monitoring over time
37
+ - **Diff reports** — Compare results between runs to surface new conflicts
38
+ - **Authentication** — OAuth proxy or basic auth for the web UI and MCP endpoints
39
+ - **Rate limiting** — Per-client rate limiting on the web API
40
+
41
+ ## Infrastructure
42
+
43
+ - **CI/CD pipeline** — GitHub Actions for tests, linting, container builds
44
+ - **Helm chart** — Parameterized OpenShift/Kubernetes deployment
45
+ - **CycloneDX SBOM export** — Machine-readable SBOM in addition to the current
46
+ human-readable `SBOM.md`
47
+ - **Database backend alternatives** — PostgreSQL or DuckDB for multi-replica
48
+ deployments where SQLite's single-writer limitation is a problem
49
+
50
+ ## Agent integration
51
+
52
+ - **MCP authentication** — Token-based auth for the MCP endpoints
53
+ - **Batch monitoring** — MCP tool to search a large watchlist of terms on a
54
+ schedule and report only new/changed results since last check
55
+ - **Monitoring tool** — MCP tool to set up alerts when new marks are filed that
56
+ match a watched term
57
+
58
+ ## Documentation
59
+
60
+ No outstanding documentation items. Man page at `docs/chktm.1`, API reference
61
+ at `/docs` (Swagger UI) and `/redoc` (ReDoc) are included in v0.1.
@@ -0,0 +1,143 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [Unreleased]
9
+
10
+ ### Added
11
+ - Project skeleton: `pyproject.toml` (hatchling), `src/chktm/` package layout
12
+ - `schema.py` — SQLite schema (marks, mark_classes, meta tables), text
13
+ normalization, date parsing, status-code live/dead heuristic
14
+ - `ingest.py` — Streaming XML parser using `defusedxml.ElementTree.iterparse`
15
+ with batched upserts and memory-bounded processing
16
+ - `cli.py` — Typer CLI with commands: `init`, `update`, `search`, `status`,
17
+ `version`. All commands support `--json` for structured machine-readable output
18
+ - `disclaimer.py` — Single source of truth for legal disclaimer text
19
+ - `bulk_ingest_mode()` context manager for tuned SQLite writes
20
+ (`synchronous=NORMAL`, 64 MB cache)
21
+ - `CHKTM_DATA_DIR` environment variable support for overriding default data
22
+ directory
23
+ - Stable exit codes: 0 (success), 1 (error), 2 (no database)
24
+ - `fetch.py` — Download module for USPTO ODP API with rate limiting
25
+ (4 ZIP/min), progress callbacks, resumability via file-size checks and
26
+ meta table tracking
27
+ - `search.py` — Query engine with normalized substring matching, class
28
+ filtering, and risk-tier classification (HIGH/MEDIUM/LOW)
29
+ - `report.py` — Markdown and JSON report rendering grouped by risk tier,
30
+ with disclaimer and TSDR links
31
+ - `chktm init` — Full pipeline: download annual backfile + daily files,
32
+ extract, ingest into SQLite, with Rich progress bars
33
+ - `chktm update` — Incremental daily file download and ingest since last
34
+ update date
35
+ - `chktm search` — Fully implemented with `--classes`, `--include-dead`,
36
+ `--out`, and `--json` flags
37
+ - Test suite: 54 tests covering schema utilities, ingest, search, and
38
+ report rendering
39
+ - Test fixtures: `sample_edge_cases.xml` (synthetic), `sample_daily.xml`
40
+ (extracted from USPTO daily file)
41
+ - `web.py` — FastAPI web application with lightweight search UI and REST API
42
+ (`GET /`, `GET /api/status`, `GET /api/search`)
43
+ - `mcp_server.py` — MCP (Model Context Protocol) server with `search_trademarks`
44
+ and `corpus_status` tools. Supports stdio, SSE, and Streamable HTTP transports
45
+ - `chktm serve` — CLI command to start the combined web UI + MCP server
46
+ - `Containerfile` — Multi-stage build for quay.io, runs as non-root (UID 1001),
47
+ PVC mount at `/data` for the SQLite database
48
+ - OpenShift deployment manifests in `deploy/openshift/`:
49
+ Namespace, PVC, Secret, init Job, Deployment, Service, Route, update CronJob
50
+ - `docs/deployment.md` — Full deployment guide for OpenShift with quay.io,
51
+ including MCP client configuration
52
+ - `SBOM.md` — Software Bill of Materials listing all direct and transitive
53
+ dependencies with licenses
54
+ - `docs/architecture.md` — Data flow, module responsibilities, database schema,
55
+ security model, AI agent interface contract
56
+ - `docs/recon-phase1.md` — Phase 1 recon report documenting USPTO ODP API
57
+ findings, XML schema, status codes, rate limits, and gotchas
58
+ - `README.md` — Project overview, quickstart, CLI reference, known limitations
59
+ - `CONTRIBUTING.md` — DCO sign-off, scope philosophy, setup, code style
60
+ - `BACKLOG.md` — Deferred items: fuzzy matching, multi-jurisdiction, PyPI, etc.
61
+ - `CODE_OF_CONDUCT.md` — Contributor Covenant v2.1
62
+ - `SECURITY.md` — Threat model, OWASP Top 10 compliance mapping, vuln reporting
63
+ - `docs/usage-guide.md` — Search best practices, class selection, risk tier
64
+ interpretation, agent efficiency tips for minimizing tokens/round-trips
65
+ - `docs/chktm.1` — Man page covering all commands, options, exit codes,
66
+ environment variables, config files, endpoints, and examples
67
+ - Interactive API reference at `/docs` (Swagger UI) and `/redoc` (ReDoc)
68
+ auto-generated by FastAPI from endpoint definitions
69
+ - `docs/testing-mcp.md` — Step-by-step MCP Inspector testing guide covering
70
+ Streamable HTTP, SSE, stdio, and CLI modes on all platforms
71
+ - Multiplatform documentation: all install, env var, and MCP config examples
72
+ include Linux, macOS, and Windows variants
73
+ - Version centralized in `src/chktm/__init__.py` — single source of truth for
74
+ CLI, web app, API responses, MCP protocol handshake, and pyproject.toml
75
+ - `--report legal` flag on `chktm search` — generates attorney-ready report
76
+ with executive summary, component word analysis (auto-splits compound terms),
77
+ risk assessment grouped by owner, limitations, and recommended next steps
78
+ - `generate_legal_report` MCP tool — same legal report available to AI agents
79
+ - PDF output for legal reports — `--out report.pdf` auto-detected from file
80
+ extension. Professional formatting with tables, color-coded risk tiers,
81
+ section headings, and print-ready layout. Uses `fpdf2` (pure Python, LGPL-3.0)
82
+ - `config.py` — Persistent config file (`~/.config/chktm/config.toml` on
83
+ Linux/macOS, `%APPDATA%\chktm\config.toml` on Windows). Saves data directory
84
+ during init so subsequent commands find the database automatically.
85
+ Resolution order: `--data-dir` flag > `CHKTM_DATA_DIR` env > config > `./data`
86
+
87
+ ### Security
88
+ - XML parsing hardened with `defusedxml` to block entity expansion, XXE, and
89
+ DTD retrieval attacks
90
+ - All SQL queries use parameterized statements (`?` placeholders)
91
+ - SQL LIKE wildcards (`%`, `_`) escaped in user search terms to prevent
92
+ unintended pattern matching (OWASP A03:2021)
93
+ - Security headers on all web responses: CSP, X-Content-Type-Options,
94
+ X-Frame-Options, Referrer-Policy, Permissions-Policy (OWASP A05:2021)
95
+ - Input validation: query length (500 chars), term count (20), class count (45),
96
+ per-term length (200 chars), result cap (1,000 per term) (OWASP A04:2021)
97
+ - API key read from environment variable, never logged or committed
98
+ - Filename sanitization on downloads to prevent path traversal (OWASP A04)
99
+ - International class range validation (1-45) on web API (OWASP A04)
100
+ - Generic error messages to clients — internal paths no longer exposed (OWASP A01)
101
+ - `Strict-Transport-Security` (HSTS) header added (OWASP A05)
102
+ - `Cache-Control: no-store` header to prevent caching of search results
103
+ - Container hardened for OpenShift `restricted` SCC: arbitrary UID with GID 0,
104
+ `readOnlyRootFilesystem`, `drop: ALL` capabilities, `runAsNonRoot`,
105
+ `seccompProfile: RuntimeDefault`, `automountServiceAccountToken: false`
106
+ - All OpenShift manifests include pod and container security contexts
107
+
108
+ ### Resilience
109
+ - Per-file retry with exponential backoff (2s, 4s, 8s) on network errors,
110
+ HTTP 429 (with Retry-After), and HTTP 5xx server errors
111
+ - Download integrity check: verifies file size matches API metadata after
112
+ download, deletes and retries on mismatch
113
+ - Disk space check before `init` starts (~30 GB required)
114
+ - Corrupt ZIP handling: `zipfile.BadZipFile` caught and logged, file deleted
115
+ and skipped instead of crashing the pipeline
116
+ - Graceful SIGINT/SIGTERM shutdown: finishes current file, commits to DB,
117
+ exits cleanly. Re-run to resume.
118
+ - Non-ZIP files in product listings (`.doc`, `.pdf`) filtered out
119
+ - JSON progress events emitted in `--json` mode for container/CI monitoring
120
+ - ETA display in progress bar based on running average per file
121
+
122
+ ### Performance
123
+ - `pipeline.py` — Pipelined download + ingest using producer/consumer threading;
124
+ overlaps file download with ingest of the previous file (~20-25% faster)
125
+ - `--off-peak` flag on `init` and `update` — uses USPTO off-peak rate limits
126
+ (12 req/min vs 4 req/min, ~3x faster downloads between 10pm-5am EST)
127
+ - `--stream-ingest` flag — streams XML directly from ZIP to parser without
128
+ writing to disk (lower I/O, same memory via streaming decompression)
129
+ - ZIP files deleted after successful ingestion by default to save disk space;
130
+ `--keep-zips` flag on `init` and `update` to retain them
131
+ - WAL checkpoint (`PRAGMA wal_checkpoint(TRUNCATE)`) after init/update completes
132
+ to consolidate the WAL file into the main database
133
+ - Stack-based parent tracking in iterparse prevents memory leak on large files
134
+ - Batched DELETE for class associations using `WHERE IN` (999-element chunks)
135
+ instead of per-row deletes
136
+ - N+1 class query elimination — batch-fetches class associations in a single
137
+ chunked `WHERE IN` query instead of one query per match
138
+ - Composite index `(is_live, wordmark_normalized)` for faster filtered searches
139
+ - MCP server instructions trimmed from ~460 to ~80 tokens per connection
140
+ - Legal report component searches exclude dead marks and truncate goods/services
141
+ to reduce MCP response token usage
142
+ - SQLite bulk-ingest pragmas (`synchronous=NORMAL`, `cache_size=-64000`)
143
+ applied during ingestion via context manager
@@ -0,0 +1,12 @@
1
+ # Code of Conduct
2
+
3
+ This project follows the [Contributor Covenant v2.1](https://www.contributor-covenant.org/version/2/1/code_of_conduct/).
4
+
5
+ By participating in this project, you agree to abide by its terms.
6
+
7
+ ## Reporting
8
+
9
+ If you experience or witness unacceptable behavior, please report it by opening
10
+ a GitHub issue or contacting the maintainer directly.
11
+
12
+ Reports will be reviewed and responded to promptly.
@@ -0,0 +1,129 @@
1
+ # Contributing to chktm
2
+
3
+ Thanks for your interest in contributing. chktm is deliberately small — it does
4
+ one thing (trademark screening) and tries to do it well.
5
+
6
+ ## Scope philosophy
7
+
8
+ We are deliberately small. Features outside the v0.1 scope are tracked in
9
+ [BACKLOG.md](BACKLOG.md) and considered case-by-case. PRs that add scope without
10
+ discussion will be asked to open an issue first.
11
+
12
+ ## Getting started
13
+
14
+ ### Prerequisites
15
+
16
+ - Python 3.11+ (Linux, macOS, or Windows)
17
+ - Git
18
+ - A free [USPTO ODP API key](https://data.uspto.gov/apis/getting-started)
19
+ (only needed for `chktm init` / `chktm update`, not for development)
20
+
21
+ ### Setup
22
+
23
+ **Linux / macOS:**
24
+
25
+ ```bash
26
+ git clone https://github.com/nickschuetz/chktm.git
27
+ cd chktm
28
+ pip install -e .
29
+ ```
30
+
31
+ **Windows (PowerShell):**
32
+
33
+ ```powershell
34
+ git clone https://github.com/nickschuetz/chktm.git
35
+ cd chktm
36
+ pip install -e .
37
+ ```
38
+
39
+ The install command is identical across platforms. If your system uses `pip3`
40
+ instead of `pip`, substitute accordingly.
41
+
42
+ ### Run tests
43
+
44
+ **All platforms:**
45
+
46
+ ```bash
47
+ pytest
48
+ ```
49
+
50
+ Tests must not hit the network. They use checked-in fixture XML files under
51
+ `tests/fixtures/`. All tests must pass before any PR is merged.
52
+
53
+ ### Code style
54
+
55
+ - Formatter: `ruff format`
56
+ - Linter: `ruff check`
57
+ - Type hints on all public functions
58
+ - `from __future__ import annotations` at the top of every module
59
+ - Apache-2.0 SPDX header at the top of every source file:
60
+ `# SPDX-License-Identifier: Apache-2.0`
61
+
62
+ **All platforms:**
63
+
64
+ ```bash
65
+ ruff format src/ tests/
66
+ ruff check src/ tests/
67
+ ```
68
+
69
+ ### Platform notes for contributors
70
+
71
+ - **Paths:** Always use `pathlib.Path`, never hardcoded `/` or `\` separators.
72
+ - **Environment variables:** Document both `export` (Linux/macOS) and
73
+ `$env:` (Windows PowerShell) forms when referencing env vars in docs.
74
+ - **Line endings:** The repo uses LF line endings. Git's `core.autocrlf`
75
+ handles conversion on Windows.
76
+ - **Shell commands:** If a command differs between platforms, show all variants.
77
+ Commands that work identically everywhere need only be shown once.
78
+
79
+ ## Commits
80
+
81
+ ### DCO sign-off
82
+
83
+ All commits must be signed off under the
84
+ [Developer Certificate of Origin](https://developercertificate.org/):
85
+
86
+ ```bash
87
+ git commit -s -m "your commit message"
88
+ ```
89
+
90
+ This adds a `Signed-off-by:` line to your commit message, certifying that you
91
+ wrote the code or have the right to submit it under the project's license.
92
+
93
+ ### Commit messages
94
+
95
+ - Clarity beats format. Conventional commits are fine but not required.
96
+ - One logical change per commit.
97
+ - Never commit downloaded USPTO data, the SQLite database, or anything under
98
+ `data/`.
99
+
100
+ ## Pull requests
101
+
102
+ 1. Open an issue first for anything beyond a trivial bug fix.
103
+ 2. Keep PRs focused — one logical change per PR.
104
+ 3. All tests must pass.
105
+ 4. `ruff format` and `ruff check` must pass with no errors.
106
+ 5. Update `CHANGELOG.md` under `[Unreleased]` with a brief description.
107
+
108
+ ## What not to contribute (yet)
109
+
110
+ These are tracked in BACKLOG.md and not ready for PRs:
111
+
112
+ - Fuzzy/phonetic matching
113
+ - Web UI beyond the current lightweight search form
114
+ - Multi-jurisdiction support
115
+ - PyPI publishing
116
+ - Common-law usage checks
117
+
118
+ If you want to work on any of these, open an issue to discuss approach first.
119
+
120
+ ## The disclaimer is load-bearing
121
+
122
+ chktm is a research aid, not legal clearance. That sentence must appear in:
123
+ the README, `--help` output, every generated report, and `src/chktm/disclaimer.py`
124
+ as the single source of truth. Do not paraphrase it into something softer.
125
+
126
+ ## Questions?
127
+
128
+ Open an issue. A two-line clarifying question is cheaper than an hour of work
129
+ in the wrong direction.