colorsense 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. colorsense-0.1.0/.gitignore +223 -0
  2. colorsense-0.1.0/LICENSE +21 -0
  3. colorsense-0.1.0/PKG-INFO +239 -0
  4. colorsense-0.1.0/README.md +209 -0
  5. colorsense-0.1.0/pyproject.toml +76 -0
  6. colorsense-0.1.0/src/colorsense/__init__.py +81 -0
  7. colorsense-0.1.0/src/colorsense/classify/__init__.py +0 -0
  8. colorsense-0.1.0/src/colorsense/classify/components.py +299 -0
  9. colorsense-0.1.0/src/colorsense/classify/tokens.py +182 -0
  10. colorsense-0.1.0/src/colorsense/color/__init__.py +0 -0
  11. colorsense-0.1.0/src/colorsense/color/primitives.py +177 -0
  12. colorsense-0.1.0/src/colorsense/config.py +559 -0
  13. colorsense-0.1.0/src/colorsense/data/palette_config.yaml +351 -0
  14. colorsense-0.1.0/src/colorsense/harvest/__init__.py +107 -0
  15. colorsense-0.1.0/src/colorsense/harvest/dom.py +186 -0
  16. colorsense-0.1.0/src/colorsense/harvest/render.py +242 -0
  17. colorsense-0.1.0/src/colorsense/harvest/screenshot.py +303 -0
  18. colorsense-0.1.0/src/colorsense/harvest/states.py +165 -0
  19. colorsense-0.1.0/src/colorsense/harvest/tokens.py +130 -0
  20. colorsense-0.1.0/src/colorsense/models.py +353 -0
  21. colorsense-0.1.0/src/colorsense/net/__init__.py +0 -0
  22. colorsense-0.1.0/src/colorsense/net/politeness.py +322 -0
  23. colorsense-0.1.0/src/colorsense/palette/__init__.py +0 -0
  24. colorsense-0.1.0/src/colorsense/palette/inventory.py +185 -0
  25. colorsense-0.1.0/src/colorsense/palette/reconcile.py +313 -0
  26. colorsense-0.1.0/src/colorsense/palette/roles.py +346 -0
  27. colorsense-0.1.0/src/colorsense/pipeline.py +258 -0
  28. colorsense-0.1.0/src/colorsense/py.typed +0 -0
  29. colorsense-0.1.0/tests/conftest.py +20 -0
  30. colorsense-0.1.0/tests/fixtures/.gitkeep +0 -0
  31. colorsense-0.1.0/tests/fixtures/cards_site.html +59 -0
  32. colorsense-0.1.0/tests/fixtures/consent.html +27 -0
  33. colorsense-0.1.0/tests/fixtures/ds_site.html +106 -0
  34. colorsense-0.1.0/tests/fixtures/hover.html +33 -0
  35. colorsense-0.1.0/tests/fixtures/legacy_site.html +61 -0
  36. colorsense-0.1.0/tests/fixtures/tokens.html +58 -0
  37. colorsense-0.1.0/tests/golden/cards_site.json +28 -0
  38. colorsense-0.1.0/tests/golden/ds_site.json +60 -0
  39. colorsense-0.1.0/tests/golden/legacy_site.json +28 -0
  40. colorsense-0.1.0/tests/test_classify_components.py +169 -0
  41. colorsense-0.1.0/tests/test_classify_tokens.py +190 -0
  42. colorsense-0.1.0/tests/test_color_primitives.py +162 -0
  43. colorsense-0.1.0/tests/test_config.py +132 -0
  44. colorsense-0.1.0/tests/test_harvest.py +168 -0
  45. colorsense-0.1.0/tests/test_harvest_screenshot.py +224 -0
  46. colorsense-0.1.0/tests/test_harvest_states.py +200 -0
  47. colorsense-0.1.0/tests/test_integration_sites.py +260 -0
  48. colorsense-0.1.0/tests/test_models.py +194 -0
  49. colorsense-0.1.0/tests/test_palette_inventory.py +210 -0
  50. colorsense-0.1.0/tests/test_palette_reconcile.py +200 -0
  51. colorsense-0.1.0/tests/test_palette_roles.py +154 -0
  52. colorsense-0.1.0/tests/test_pipeline.py +416 -0
  53. colorsense-0.1.0/tests/test_politeness_cache.py +330 -0
  54. colorsense-0.1.0/tests/test_render_errors.py +184 -0
@@ -0,0 +1,223 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[codz]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py.cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ # Pipfile.lock
96
+
97
+ # UV
98
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # uv.lock
102
+
103
+ # poetry
104
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
+ # poetry.lock
109
+ # poetry.toml
110
+
111
+ # pdm
112
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
113
+ # pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
114
+ # https://pdm-project.org/en/latest/usage/project/#working-with-version-control
115
+ # pdm.lock
116
+ # pdm.toml
117
+ .pdm-python
118
+ .pdm-build/
119
+
120
+ # pixi
121
+ # Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
122
+ # pixi.lock
123
+ # Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
124
+ # in the .venv directory. It is recommended not to include this directory in version control.
125
+ .pixi
126
+
127
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
128
+ __pypackages__/
129
+
130
+ # Celery stuff
131
+ celerybeat-schedule
132
+ celerybeat.pid
133
+
134
+ # Redis
135
+ *.rdb
136
+ *.aof
137
+ *.pid
138
+
139
+ # RabbitMQ
140
+ mnesia/
141
+ rabbitmq/
142
+ rabbitmq-data/
143
+
144
+ # ActiveMQ
145
+ activemq-data/
146
+
147
+ # SageMath parsed files
148
+ *.sage.py
149
+
150
+ # Environments
151
+ .env
152
+ .envrc
153
+ .venv
154
+ env/
155
+ venv/
156
+ ENV/
157
+ env.bak/
158
+ venv.bak/
159
+
160
+ # Spyder project settings
161
+ .spyderproject
162
+ .spyproject
163
+
164
+ # Rope project settings
165
+ .ropeproject
166
+
167
+ # mkdocs documentation
168
+ /site
169
+
170
+ # mypy
171
+ .mypy_cache/
172
+ .dmypy.json
173
+ dmypy.json
174
+
175
+ # Pyre type checker
176
+ .pyre/
177
+
178
+ # pytype static type analyzer
179
+ .pytype/
180
+
181
+ # Cython debug symbols
182
+ cython_debug/
183
+
184
+ # PyCharm
185
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
186
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
187
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
188
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
189
+ # .idea/
190
+
191
+ # Abstra
192
+ # Abstra is an AI-powered process automation framework.
193
+ # Ignore directories containing user credentials, local state, and settings.
194
+ # Learn more at https://abstra.io/docs
195
+ .abstra/
196
+
197
+ # Visual Studio Code
198
+ # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
199
+ # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
200
+ # and can be added to the global gitignore or merged into this file. However, if you prefer,
201
+ # you could uncomment the following to ignore the entire vscode folder
202
+ # .vscode/
203
+ # Temporary file for partial code execution
204
+ tempCodeRunnerFile.py
205
+
206
+ # Ruff stuff:
207
+ .ruff_cache/
208
+
209
+ # PyPI configuration file
210
+ .pypirc
211
+
212
+ # Marimo
213
+ marimo/_static/
214
+ marimo/_lsp/
215
+ __marimo__/
216
+
217
+ # Streamlit
218
+ .streamlit/secrets.toml
219
+
220
+ .claude/
221
+
222
+ # Local-only orchestration notes (not part of the published project)
223
+ ORCHESTRATOR_INSTRUCTIONS.md
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Cass
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,239 @@
1
+ Metadata-Version: 2.4
2
+ Name: colorsense
3
+ Version: 0.1.0
4
+ Summary: Extract the rendered color palette from any website as a typed result.
5
+ Project-URL: Homepage, https://github.com/cassidyhhaas/colorsense
6
+ Project-URL: Repository, https://github.com/cassidyhhaas/colorsense
7
+ Project-URL: Issues, https://github.com/cassidyhhaas/colorsense/issues
8
+ Author: Cass
9
+ License-Expression: MIT
10
+ License-File: LICENSE
11
+ Keywords: color,css,design-tokens,palette,playwright,web
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Operating System :: OS Independent
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3 :: Only
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Topic :: Multimedia :: Graphics
19
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
20
+ Classifier: Typing :: Typed
21
+ Requires-Python: >=3.12
22
+ Requires-Dist: coloraide>=4
23
+ Requires-Dist: httpx>=0.27
24
+ Requires-Dist: numpy>=1.26
25
+ Requires-Dist: pillow>=10
26
+ Requires-Dist: playwright>=1.40
27
+ Requires-Dist: pydantic>=2
28
+ Requires-Dist: pyyaml>=6
29
+ Description-Content-Type: text/markdown
30
+
31
+ # colorsense
32
+
33
+ Extract the rendered color palette from any website and return a structured, typed result
34
+ intended for downstream consumers — including AI models — that need to understand a site's
35
+ color identity (for example, to derive their own theme-matched colors).
36
+
37
+ colorsense renders a page under light (and, on request, dark) color schemes, harvests its
38
+ design tokens and computed element colors, classifies them into a 60/30/10 palette, and
39
+ reconciles what the site *declares* (CSS custom properties) against what it actually *uses*.
40
+ It returns the palette roles and scoring; producing concrete color choices for a given
41
+ widget is left to the consumer.
42
+
43
+ ## Install
44
+
45
+ ```bash
46
+ pip install colorsense
47
+ playwright install chromium
48
+ ```
49
+
50
+ Rendering uses a headless Chromium via Playwright. The browser binary is **not** a Python
51
+ package, so it cannot be pulled in as a pip dependency — run `playwright install chromium`
52
+ once after installing to download it (and `playwright install-deps chromium` on Linux to
53
+ pull the OS libraries Chromium needs).
54
+
55
+ For development from a checkout, use [uv](https://docs.astral.sh/uv/):
56
+
57
+ ```bash
58
+ uv sync
59
+ uv run playwright install chromium
60
+ ```
61
+
62
+ ## Quickstart
63
+
64
+ `analyze` is **async-native** (it renders with Playwright's async API and renders the
65
+ themes concurrently), so await it from an event loop. The result's `themes` map each color
66
+ scheme to its **60/30/10 palette** — five roles, each with ranked, scored candidate colors:
67
+
68
+ ```python
69
+ import asyncio
70
+ from colorsense import analyze, PaletteRole
71
+
72
+ result = asyncio.run(analyze("https://example.com"))
73
+
74
+ for theme, palette in result.themes.items():
75
+ # mapping always contains every role; () when none was detected
76
+ candidates = palette.roles.mapping[PaletteRole.primary]
77
+ if candidates:
78
+ primary = candidates[0] # top candidate for the role
79
+ print(theme, primary.color.hex, primary.probability)
80
+ ```
81
+
82
+ Each role — `primary`, `secondary`, `accent`, `neutral_light`, `neutral_dark` — maps to a
83
+ probability-ranked tuple of candidates. Take `[0]` for the best pick.
84
+
85
+ Inside an async application (e.g. a FastAPI `async def` endpoint) just
86
+ `result = await analyze(url)` directly — no threadpool hop required.
87
+
88
+ ## The result
89
+
90
+ `analyze` returns a fully typed `AnalysisResult` (a Pydantic model —
91
+ `result.model_dump_json()` round-trips). The fields most consumers use:
92
+
93
+ **`themes`** — the payload: each `Theme` mapped to its reconciled palette `roles`. You walk
94
+ `palette.roles.mapping[role]` to a tuple of candidates, where each candidate carries:
95
+
96
+ - `color` — a `Color`: an sRGB `hex` string plus cached **OKLCH** coordinates (`lightness`,
97
+ `chroma`, `hue`) of the composited color, and the source `alpha`. `hex` is what you paint
98
+ with; the OKLCH coordinates make it easy to derive your own theme-matched colors — sort by
99
+ perceptual lightness, build accessible tints/shades, or compute contrast — without
100
+ re-parsing the hex.
101
+ - `probability` — confidence this color fills the role (candidates within a role rank by it).
102
+ - `area` — the fraction of page area the color covers, i.e. its 60/30/10 dominance.
103
+
104
+ Sites that ignore `prefers-color-scheme` (near-identical light/dark renders) collapse to a
105
+ single reported theme.
106
+
107
+ **`fit_score`** — how well the measured palette matches the canonical 60/30/10 split, in
108
+ `[0, 1]`. A quick quality signal for the analysis as a whole.
109
+
110
+ **`status_colors`** — success/error/warning colors detected and deliberately **kept out** of
111
+ the palette, so a red error banner doesn't masquerade as a brand accent.
112
+
113
+ **`metadata`** — a typed `RunMetadata`: which themes were requested versus actually analyzed,
114
+ whether the run collapsed to a single theme, and the fetch policy in effect. Useful for
115
+ logging and for detecting the single-theme collapse.
116
+
117
+ ## Options
118
+
119
+ ```python
120
+ import asyncio
121
+ from colorsense import analyze, LIGHT_AND_DARK, PolitenessPolicy, Viewport
122
+
123
+ result = asyncio.run(
124
+ analyze(
125
+ "https://example.com",
126
+ viewport=Viewport(width=1440, height=900, device_scale_factor=2.0),
127
+ themes=LIGHT_AND_DARK, # opt in to dark mode; default is light only
128
+ politeness=PolitenessPolicy(min_interval=2.0), # see "Fetching responsibly" below
129
+ config_path="my_palette_config.yaml", # advanced; see "Custom tuning" below
130
+ )
131
+ )
132
+ ```
133
+
134
+ By default `analyze` renders **light mode only** — most sites have no dark mode, and a
135
+ second theme roughly doubles the render cost. Pass `themes=LIGHT_AND_DARK` (equivalently
136
+ `themes=(Theme.light, Theme.dark)`) to also analyze dark mode; near-identical light/dark
137
+ renders are collapsed back to a single reported theme. A custom `viewport` captures a
138
+ different layout (e.g. mobile), which can yield a different palette.
139
+
140
+ ## Fetching responsibly: politeness, authorization & security
141
+
142
+ colorsense fetches and renders a third-party page. **Authorization is the consumer's
143
+ responsibility** — the library provides *mechanism, not policy*. `PolitenessPolicy`
144
+ (in [`net/politeness.py`](src/colorsense/net/politeness.py)) gives you the controls:
145
+
146
+ - a configurable, identifiable **User-Agent**;
147
+ - a **`robots.txt` gate**, on by default (`respect_robots=True`) — a disallow raises
148
+ `RobotsDisallowedError`;
149
+ - a per-host **rate limiter** (`min_interval` seconds between same-host fetches);
150
+ - a simple URL→render **cache**.
151
+
152
+ Choose your posture by where colorsense runs:
153
+
154
+ - **Server-side / batch** (you analyze sites you operate or are authorized to crawl): keep
155
+ `respect_robots=True`, set a conservative `min_interval`, and use an identifiable
156
+ User-Agent so site operators can contact you.
157
+
158
+ ```python
159
+ policy = PolitenessPolicy(
160
+ user_agent="MyApp/1.0 (+https://myapp.example/bot)",
161
+ min_interval=2.0,
162
+ )
163
+ ```
164
+
165
+ - **Embedded / on-demand** (a user pastes a URL into your product to theme a widget): you
166
+ may legitimately analyze a page the user is entitled to view. You still own the decision
167
+ to fetch — gate it on your own authorization, terms of service, and rate limits *before*
168
+ calling `analyze`. Disabling `respect_robots` is an explicit, accountable choice, not a
169
+ default.
170
+
171
+ colorsense never decides whether a fetch is permitted; it only makes it easy to fetch
172
+ considerately once you have decided.
173
+
174
+ **Security (SSRF + local-file reads).** `analyze` fetches and renders whatever URL it is
175
+ given, so passing **untrusted** URLs exposes a server-side request forgery and local-file-read
176
+ surface. `file://` URLs read arbitrary local files (intentional, for the test fixtures), and
177
+ `http(s)://` URLs can reach internal hosts and cloud metadata endpoints (e.g.
178
+ `169.254.169.254`, `localhost`). This is by design — the politeness controls above gate
179
+ *network* schemes for robots/rate-limiting, but nothing validates the destination host. If
180
+ you accept user-supplied URLs, validate the scheme and host **before** calling `analyze`:
181
+ allowlist public hosts, and reject `file://` and private / link-local IP ranges. As above,
182
+ this is the consumer's responsibility — the library provides mechanism, not policy.
183
+
184
+ ## Advanced
185
+
186
+ ### Design-token auditing
187
+
188
+ Beyond the palette, `analyze` reports what the site's CSS **declares** versus what it
189
+ actually **renders** — useful for auditing a design system you own:
190
+
191
+ - **`tokens`** — the declared design tokens (CSS custom properties) with their inferred
192
+ semantic roles (e.g. `--accent-500` read as `brand_accent`), for the primary theme.
193
+ - **`divergence`** — discrepancies between intent and usage: brand colors **declared but
194
+ unused** in the render, and prominent rendered colors that are **used but undeclared**.
195
+
196
+ ```python
197
+ for item in result.divergence:
198
+ print(item.note, item.color.hex) # e.g. "declared '--brand' unused in render"
199
+ ```
200
+
201
+ ### Custom tuning
202
+
203
+ [`palette_config.yaml`](src/colorsense/data/palette_config.yaml) **ships bundled with the
204
+ package** and is loaded automatically. It is the single source of truth for the **token
205
+ vocabulary** (CSS custom-property names → semantic roles → 60/30/10 palette-role priors) and
206
+ the **component-classifier** weights (how rendered elements are scored into headers, cards,
207
+ CTAs, …). The weights are calibrated starting points, not ground truth.
208
+
209
+ To tune them, copy the bundled file, edit your copy, and pass its path as `config_path=` to
210
+ `analyze` (or load it with `load_config`). To inspect the defaults programmatically:
211
+
212
+ ```python
213
+ from colorsense import load_default_config
214
+
215
+ config = load_default_config()
216
+ ```
217
+
218
+ `config_path=` tunes the token vocabulary and the component classifier. The usage-side
219
+ role-scoring weights are documented in-code constants in
220
+ [`palette/roles.py`](src/colorsense/palette/roles.py) (e.g. `W_AREA`, `SOFTMAX_T`,
221
+ `TARGET_SPLIT`), not part of the YAML.
222
+
223
+ ## Development
224
+
225
+ ```bash
226
+ uv run ruff check .
227
+ uv run ruff format --check .
228
+ uv run mypy src
229
+ uv run pytest
230
+ ```
231
+
232
+ Tests are network-free: live-page work runs against saved fixture HTML under
233
+ `tests/fixtures/` served via `file://`. Integration tests in
234
+ [`tests/test_integration_sites.py`](tests/test_integration_sites.py) pin golden snapshots of
235
+ the analysis; regenerate them after an intentional change with:
236
+
237
+ ```bash
238
+ UPDATE_GOLDEN=1 uv run pytest tests/test_integration_sites.py
239
+ ```
@@ -0,0 +1,209 @@
1
+ # colorsense
2
+
3
+ Extract the rendered color palette from any website and return a structured, typed result
4
+ intended for downstream consumers — including AI models — that need to understand a site's
5
+ color identity (for example, to derive their own theme-matched colors).
6
+
7
+ colorsense renders a page under light (and, on request, dark) color schemes, harvests its
8
+ design tokens and computed element colors, classifies them into a 60/30/10 palette, and
9
+ reconciles what the site *declares* (CSS custom properties) against what it actually *uses*.
10
+ It returns the palette roles and scoring; producing concrete color choices for a given
11
+ widget is left to the consumer.
12
+
13
+ ## Install
14
+
15
+ ```bash
16
+ pip install colorsense
17
+ playwright install chromium
18
+ ```
19
+
20
+ Rendering uses a headless Chromium via Playwright. The browser binary is **not** a Python
21
+ package, so it cannot be pulled in as a pip dependency — run `playwright install chromium`
22
+ once after installing to download it (and `playwright install-deps chromium` on Linux to
23
+ pull the OS libraries Chromium needs).
24
+
25
+ For development from a checkout, use [uv](https://docs.astral.sh/uv/):
26
+
27
+ ```bash
28
+ uv sync
29
+ uv run playwright install chromium
30
+ ```
31
+
32
+ ## Quickstart
33
+
34
+ `analyze` is **async-native** (it renders with Playwright's async API and renders the
35
+ themes concurrently), so await it from an event loop. The result's `themes` map each color
36
+ scheme to its **60/30/10 palette** — five roles, each with ranked, scored candidate colors:
37
+
38
+ ```python
39
+ import asyncio
40
+ from colorsense import analyze, PaletteRole
41
+
42
+ result = asyncio.run(analyze("https://example.com"))
43
+
44
+ for theme, palette in result.themes.items():
45
+ # mapping always contains every role; () when none was detected
46
+ candidates = palette.roles.mapping[PaletteRole.primary]
47
+ if candidates:
48
+ primary = candidates[0] # top candidate for the role
49
+ print(theme, primary.color.hex, primary.probability)
50
+ ```
51
+
52
+ Each role — `primary`, `secondary`, `accent`, `neutral_light`, `neutral_dark` — maps to a
53
+ probability-ranked tuple of candidates. Take `[0]` for the best pick.
54
+
55
+ Inside an async application (e.g. a FastAPI `async def` endpoint) just
56
+ `result = await analyze(url)` directly — no threadpool hop required.
57
+
58
+ ## The result
59
+
60
+ `analyze` returns a fully typed `AnalysisResult` (a Pydantic model —
61
+ `result.model_dump_json()` round-trips). The fields most consumers use:
62
+
63
+ **`themes`** — the payload: each `Theme` mapped to its reconciled palette `roles`. You walk
64
+ `palette.roles.mapping[role]` to a tuple of candidates, where each candidate carries:
65
+
66
+ - `color` — a `Color`: an sRGB `hex` string plus cached **OKLCH** coordinates (`lightness`,
67
+ `chroma`, `hue`) of the composited color, and the source `alpha`. `hex` is what you paint
68
+ with; the OKLCH coordinates make it easy to derive your own theme-matched colors — sort by
69
+ perceptual lightness, build accessible tints/shades, or compute contrast — without
70
+ re-parsing the hex.
71
+ - `probability` — confidence this color fills the role (candidates within a role rank by it).
72
+ - `area` — the fraction of page area the color covers, i.e. its 60/30/10 dominance.
73
+
74
+ Sites that ignore `prefers-color-scheme` (near-identical light/dark renders) collapse to a
75
+ single reported theme.
76
+
77
+ **`fit_score`** — how well the measured palette matches the canonical 60/30/10 split, in
78
+ `[0, 1]`. A quick quality signal for the analysis as a whole.
79
+
80
+ **`status_colors`** — success/error/warning colors detected and deliberately **kept out** of
81
+ the palette, so a red error banner doesn't masquerade as a brand accent.
82
+
83
+ **`metadata`** — a typed `RunMetadata`: which themes were requested versus actually analyzed,
84
+ whether the run collapsed to a single theme, and the fetch policy in effect. Useful for
85
+ logging and for detecting the single-theme collapse.
86
+
87
+ ## Options
88
+
89
+ ```python
90
+ import asyncio
91
+ from colorsense import analyze, LIGHT_AND_DARK, PolitenessPolicy, Viewport
92
+
93
+ result = asyncio.run(
94
+ analyze(
95
+ "https://example.com",
96
+ viewport=Viewport(width=1440, height=900, device_scale_factor=2.0),
97
+ themes=LIGHT_AND_DARK, # opt in to dark mode; default is light only
98
+ politeness=PolitenessPolicy(min_interval=2.0), # see "Fetching responsibly" below
99
+ config_path="my_palette_config.yaml", # advanced; see "Custom tuning" below
100
+ )
101
+ )
102
+ ```
103
+
104
+ By default `analyze` renders **light mode only** — most sites have no dark mode, and a
105
+ second theme roughly doubles the render cost. Pass `themes=LIGHT_AND_DARK` (equivalently
106
+ `themes=(Theme.light, Theme.dark)`) to also analyze dark mode; near-identical light/dark
107
+ renders are collapsed back to a single reported theme. A custom `viewport` captures a
108
+ different layout (e.g. mobile), which can yield a different palette.
109
+
110
+ ## Fetching responsibly: politeness, authorization & security
111
+
112
+ colorsense fetches and renders a third-party page. **Authorization is the consumer's
113
+ responsibility** — the library provides *mechanism, not policy*. `PolitenessPolicy`
114
+ (in [`net/politeness.py`](src/colorsense/net/politeness.py)) gives you the controls:
115
+
116
+ - a configurable, identifiable **User-Agent**;
117
+ - a **`robots.txt` gate**, on by default (`respect_robots=True`) — a disallow raises
118
+ `RobotsDisallowedError`;
119
+ - a per-host **rate limiter** (`min_interval` seconds between same-host fetches);
120
+ - a simple URL→render **cache**.
121
+
122
+ Choose your posture by where colorsense runs:
123
+
124
+ - **Server-side / batch** (you analyze sites you operate or are authorized to crawl): keep
125
+ `respect_robots=True`, set a conservative `min_interval`, and use an identifiable
126
+ User-Agent so site operators can contact you.
127
+
128
+ ```python
129
+ policy = PolitenessPolicy(
130
+ user_agent="MyApp/1.0 (+https://myapp.example/bot)",
131
+ min_interval=2.0,
132
+ )
133
+ ```
134
+
135
+ - **Embedded / on-demand** (a user pastes a URL into your product to theme a widget): you
136
+ may legitimately analyze a page the user is entitled to view. You still own the decision
137
+ to fetch — gate it on your own authorization, terms of service, and rate limits *before*
138
+ calling `analyze`. Disabling `respect_robots` is an explicit, accountable choice, not a
139
+ default.
140
+
141
+ colorsense never decides whether a fetch is permitted; it only makes it easy to fetch
142
+ considerately once you have decided.
143
+
144
+ **Security (SSRF + local-file reads).** `analyze` fetches and renders whatever URL it is
145
+ given, so passing **untrusted** URLs exposes a server-side request forgery and local-file-read
146
+ surface. `file://` URLs read arbitrary local files (intentional, for the test fixtures), and
147
+ `http(s)://` URLs can reach internal hosts and cloud metadata endpoints (e.g.
148
+ `169.254.169.254`, `localhost`). This is by design — the politeness controls above gate
149
+ *network* schemes for robots/rate-limiting, but nothing validates the destination host. If
150
+ you accept user-supplied URLs, validate the scheme and host **before** calling `analyze`:
151
+ allowlist public hosts, and reject `file://` and private / link-local IP ranges. As above,
152
+ this is the consumer's responsibility — the library provides mechanism, not policy.
153
+
154
+ ## Advanced
155
+
156
+ ### Design-token auditing
157
+
158
+ Beyond the palette, `analyze` reports what the site's CSS **declares** versus what it
159
+ actually **renders** — useful for auditing a design system you own:
160
+
161
+ - **`tokens`** — the declared design tokens (CSS custom properties) with their inferred
162
+ semantic roles (e.g. `--accent-500` read as `brand_accent`), for the primary theme.
163
+ - **`divergence`** — discrepancies between intent and usage: brand colors **declared but
164
+ unused** in the render, and prominent rendered colors that are **used but undeclared**.
165
+
166
+ ```python
167
+ for item in result.divergence:
168
+ print(item.note, item.color.hex) # e.g. "declared '--brand' unused in render"
169
+ ```
170
+
171
+ ### Custom tuning
172
+
173
+ [`palette_config.yaml`](src/colorsense/data/palette_config.yaml) **ships bundled with the
174
+ package** and is loaded automatically. It is the single source of truth for the **token
175
+ vocabulary** (CSS custom-property names → semantic roles → 60/30/10 palette-role priors) and
176
+ the **component-classifier** weights (how rendered elements are scored into headers, cards,
177
+ CTAs, …). The weights are calibrated starting points, not ground truth.
178
+
179
+ To tune them, copy the bundled file, edit your copy, and pass its path as `config_path=` to
180
+ `analyze` (or load it with `load_config`). To inspect the defaults programmatically:
181
+
182
+ ```python
183
+ from colorsense import load_default_config
184
+
185
+ config = load_default_config()
186
+ ```
187
+
188
+ `config_path=` tunes the token vocabulary and the component classifier. The usage-side
189
+ role-scoring weights are documented in-code constants in
190
+ [`palette/roles.py`](src/colorsense/palette/roles.py) (e.g. `W_AREA`, `SOFTMAX_T`,
191
+ `TARGET_SPLIT`), not part of the YAML.
192
+
193
+ ## Development
194
+
195
+ ```bash
196
+ uv run ruff check .
197
+ uv run ruff format --check .
198
+ uv run mypy src
199
+ uv run pytest
200
+ ```
201
+
202
+ Tests are network-free: live-page work runs against saved fixture HTML under
203
+ `tests/fixtures/` served via `file://`. Integration tests in
204
+ [`tests/test_integration_sites.py`](tests/test_integration_sites.py) pin golden snapshots of
205
+ the analysis; regenerate them after an intentional change with:
206
+
207
+ ```bash
208
+ UPDATE_GOLDEN=1 uv run pytest tests/test_integration_sites.py
209
+ ```