gmeow 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. gmeow-1.0.0/.gitignore +39 -0
  2. gmeow-1.0.0/CODE_OF_CONDUCT.md +61 -0
  3. gmeow-1.0.0/CONTRIBUTING.md +189 -0
  4. gmeow-1.0.0/LICENSE +9 -0
  5. gmeow-1.0.0/PKG-INFO +215 -0
  6. gmeow-1.0.0/README.md +170 -0
  7. gmeow-1.0.0/SECURITY.md +91 -0
  8. gmeow-1.0.0/alembic.ini +38 -0
  9. gmeow-1.0.0/config.toml-example +75 -0
  10. gmeow-1.0.0/docs/BRAND.md +35 -0
  11. gmeow-1.0.0/docs/POSTGRES_PARTITION_READINESS.md +23 -0
  12. gmeow-1.0.0/docs/PUBLIC_RELEASE_CHECKLIST.md +18 -0
  13. gmeow-1.0.0/docs/SOURCE_DOCS.md +22 -0
  14. gmeow-1.0.0/docs/gmeow-logo.svg +120 -0
  15. gmeow-1.0.0/docs/social-preview.png +0 -0
  16. gmeow-1.0.0/docs/social-preview.svg +61 -0
  17. gmeow-1.0.0/docs/systemd.md +38 -0
  18. gmeow-1.0.0/main.py +13 -0
  19. gmeow-1.0.0/migrations/MODULE.md +20 -0
  20. gmeow-1.0.0/migrations/__init__.py +12 -0
  21. gmeow-1.0.0/migrations/env.py +51 -0
  22. gmeow-1.0.0/migrations/script.py.mako +26 -0
  23. gmeow-1.0.0/migrations/versions/20260523_0001_postgres_storage.py +386 -0
  24. gmeow-1.0.0/migrations/versions/20260523_0002_complete_sql_comments.py +81 -0
  25. gmeow-1.0.0/migrations/versions/20260523_0003_comment_all_columns.py +90 -0
  26. gmeow-1.0.0/migrations/versions/20260523_0004_comment_constraints.py +86 -0
  27. gmeow-1.0.0/migrations/versions/20260523_0005_ingest_issue_indexes.py +32 -0
  28. gmeow-1.0.0/migrations/versions/20260523_0006_scale_indexes_and_maintenance.py +83 -0
  29. gmeow-1.0.0/migrations/versions/20260523_0007_attachment_rows_and_search_fixes.py +72 -0
  30. gmeow-1.0.0/migrations/versions/20260523_0008_scoped_hnsw_indexes.py +69 -0
  31. gmeow-1.0.0/migrations/versions/20260523_0009_message_search_columns.py +72 -0
  32. gmeow-1.0.0/migrations/versions/20260523_0010_graph_node_profiles.py +73 -0
  33. gmeow-1.0.0/migrations/versions/20260523_0011_content_object_refs.py +68 -0
  34. gmeow-1.0.0/migrations/versions/20260523_0012_summary_items.py +81 -0
  35. gmeow-1.0.0/migrations/versions/20260523_0013_graph_edge_stats.py +70 -0
  36. gmeow-1.0.0/migrations/versions/20260523_0014_timeline_views.py +96 -0
  37. gmeow-1.0.0/migrations/versions/20260523_0015_materialized_summary_views.py +191 -0
  38. gmeow-1.0.0/migrations/versions/20260523_0016_resilience_recovery.py +164 -0
  39. gmeow-1.0.0/migrations/versions/20260523_0017_archive_secondary_mail.py +230 -0
  40. gmeow-1.0.0/migrations/versions/MODULE.md +14 -0
  41. gmeow-1.0.0/migrations/versions/__init__.py +13 -0
  42. gmeow-1.0.0/pyproject.toml +90 -0
  43. gmeow-1.0.0/scripts/MODULE.md +18 -0
  44. gmeow-1.0.0/scripts/__init__.py +12 -0
  45. gmeow-1.0.0/scripts/public_release_check.py +97 -0
  46. gmeow-1.0.0/src/gmeow/MODULE.md +69 -0
  47. gmeow-1.0.0/src/gmeow/__init__.py +16 -0
  48. gmeow-1.0.0/src/gmeow/_typing.py +23 -0
  49. gmeow-1.0.0/src/gmeow/app.py +980 -0
  50. gmeow-1.0.0/src/gmeow/attachment_analysis.py +401 -0
  51. gmeow-1.0.0/src/gmeow/cache.py +530 -0
  52. gmeow-1.0.0/src/gmeow/categories.py +397 -0
  53. gmeow-1.0.0/src/gmeow/cli.py +600 -0
  54. gmeow-1.0.0/src/gmeow/config.py +426 -0
  55. gmeow-1.0.0/src/gmeow/db.py +32 -0
  56. gmeow-1.0.0/src/gmeow/gmail.py +277 -0
  57. gmeow-1.0.0/src/gmeow/gmail_actions.py +59 -0
  58. gmeow-1.0.0/src/gmeow/graph.py +481 -0
  59. gmeow-1.0.0/src/gmeow/headers.py +71 -0
  60. gmeow-1.0.0/src/gmeow/http_json.py +56 -0
  61. gmeow-1.0.0/src/gmeow/imap_server.py +265 -0
  62. gmeow-1.0.0/src/gmeow/intelligence.py +112 -0
  63. gmeow-1.0.0/src/gmeow/kg.py +473 -0
  64. gmeow-1.0.0/src/gmeow/maintenance.py +391 -0
  65. gmeow-1.0.0/src/gmeow/markdown.py +106 -0
  66. gmeow-1.0.0/src/gmeow/mcp_server.py +950 -0
  67. gmeow-1.0.0/src/gmeow/metadata.py +40 -0
  68. gmeow-1.0.0/src/gmeow/object_store.py +270 -0
  69. gmeow-1.0.0/src/gmeow/parser.py +122 -0
  70. gmeow-1.0.0/src/gmeow/pg_cache.py +3081 -0
  71. gmeow-1.0.0/src/gmeow/pg_cache_archive.py +341 -0
  72. gmeow-1.0.0/src/gmeow/pg_cache_helpers.py +420 -0
  73. gmeow-1.0.0/src/gmeow/pg_cache_imap.py +210 -0
  74. gmeow-1.0.0/src/gmeow/pg_cache_jobs.py +526 -0
  75. gmeow-1.0.0/src/gmeow/protocols.py +249 -0
  76. gmeow-1.0.0/src/gmeow/provision.py +45 -0
  77. gmeow-1.0.0/src/gmeow/resilience.py +197 -0
  78. gmeow-1.0.0/src/gmeow/semantic.py +108 -0
  79. gmeow-1.0.0/src/gmeow/semantic_pg.py +140 -0
  80. gmeow-1.0.0/src/gmeow/sync.py +837 -0
  81. gmeow-1.0.0/src/gmeow/sync_backfill.py +124 -0
  82. gmeow-1.0.0/src/gmeow/text_index.py +75 -0
  83. gmeow-1.0.0/src/gmeow/toon.py +91 -0
  84. gmeow-1.0.0/tests/MODULE.md +6 -0
  85. gmeow-1.0.0/tests/__init__.py +11 -0
  86. gmeow-1.0.0/tests/_test_config.py +25 -0
  87. gmeow-1.0.0/tests/conftest.py +10 -0
  88. gmeow-1.0.0/tests/test_api.py +49 -0
  89. gmeow-1.0.0/tests/test_core.py +1151 -0
  90. gmeow-1.0.0/tests/test_gmail_actions.py +81 -0
  91. gmeow-1.0.0/tests/test_sync_backfill.py +94 -0
gmeow-1.0.0/.gitignore ADDED
@@ -0,0 +1,39 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ .pytest_cache/
4
+ .ruff_cache/
5
+ .venv/
6
+ dist/
7
+ build/
8
+ data/
9
+ *.egg-info/
10
+ .env
11
+ .env.*
12
+ config.toml
13
+ config/*.local.*
14
+ config/gmeow.yaml
15
+ config/*.secret.*
16
+ **/service-account*.json
17
+ **/*service_account*.json
18
+ **/imap-password
19
+ exports/
20
+ .mypy_cache/
21
+ .coverage
22
+ htmlcov/
23
+
24
+ # coding-ethos generated runtime output
25
+ .code-ethos/cache/
26
+ .coding-ethos/cache/
27
+ .coding-ethos/code-intel.db
28
+ .coding-ethos/hook-runs/
29
+ .coding-ethos/lint-runs/
30
+ .coding-ethos/prune-runs/
31
+ .coding-ethos/state/
32
+ .coding-ethos/code-intel.*
33
+
34
+ .coding-ethos/code-intel.db-shm
35
+ .coding-ethos/code-intel.db-wal
36
+
37
+ .coding-ethos/code-intel.duckdb
38
+ .coding-ethos/code-intel.duckdb.wal
39
+ .coding-ethos/events/
@@ -0,0 +1,61 @@
1
+ <!-- SPDX-FileCopyrightText: 2026 Blackcat Informatics Inc. -->
2
+ <!-- SPDX-License-Identifier: MIT -->
3
+
4
+ # Gmeow Contributor Code of Conduct
5
+
6
+ ## Purpose
7
+
8
+ Gmeow is a professional open source project stewarded by Blackcat Informatics
9
+ Inc. We want collaboration here to stay technical, respectful, privacy-aware,
10
+ and productive.
11
+
12
+ ## Expectations
13
+
14
+ We ask contributors to:
15
+
16
+ - stay technical and focus discussions on code, docs, testing, security,
17
+ privacy, and design
18
+ - be respectful and critique ideas rather than people
19
+ - assume good intent and ask clarifying questions before escalating
20
+ - keep feedback specific, actionable, and professional
21
+ - avoid posting private mailbox data, credentials, personal information, or
22
+ attachment contents in project spaces
23
+ - help maintain a working environment where contributors can do high-quality
24
+ engineering work
25
+
26
+ ## Unacceptable Behavior
27
+
28
+ The following is not acceptable in project spaces such as issues, pull requests,
29
+ discussions, chats, or events:
30
+
31
+ - harassment, discrimination, or threats
32
+ - personal attacks, intimidation, or bullying
33
+ - sustained disruption that derails collaboration
34
+ - publishing private information without consent
35
+ - posting mailbox data, secrets, access tokens, or private attachment content
36
+ - coordinated bad-faith participation, trolling, or sealioning
37
+
38
+ ## Enforcement
39
+
40
+ Project maintainers may take any action they deem appropriate, including
41
+ warnings, requests to disengage, comment moderation, removal of sensitive
42
+ content, or bans from project spaces. Decisions are final.
43
+
44
+ ## Reporting
45
+
46
+ If you experience or witness behavior that violates this Code of Conduct:
47
+
48
+ - email <conduct@blackcat.ca>
49
+ - include links, context, and any relevant screenshots or logs
50
+ - avoid public escalation while a report is being reviewed
51
+
52
+ Reports are handled confidentially and will be acknowledged within two business
53
+ days.
54
+
55
+ Security vulnerabilities should be reported through [SECURITY.md](SECURITY.md)
56
+ rather than the conduct process.
57
+
58
+ ## Attribution
59
+
60
+ This policy is adapted from common open source codes of conduct and tailored for
61
+ the Gmeow project.
@@ -0,0 +1,189 @@
1
+ <!-- SPDX-FileCopyrightText: 2026 Blackcat Informatics Inc. -->
2
+ <!-- SPDX-License-Identifier: MIT -->
3
+
4
+ # Contributing to Gmeow
5
+
6
+ Gmeow is an open source local Gmail intelligence, archive, REST, MCP, and
7
+ read-only IMAP service maintained by Blackcat Informatics Inc. Contributions to
8
+ code, tests, documentation, migrations, and release tooling are welcome.
9
+
10
+ ## Code of Conduct
11
+
12
+ Please read the [Code of Conduct](CODE_OF_CONDUCT.md) before participating. We
13
+ expect respectful, professional collaboration. To report unacceptable behavior,
14
+ email <conduct@blackcat.ca>.
15
+
16
+ ## Security and Privacy First
17
+
18
+ Gmeow works with private mailbox data. Treat privacy as part of the engineering
19
+ contract.
20
+
21
+ - Do not commit `config.toml`, credentials, mailbox exports, object-store data,
22
+ database dumps, SOPS files, or runtime data.
23
+ - Use synthetic fixtures in tests and examples.
24
+ - Do not paste real Gmail messages, access tokens, service-account keys,
25
+ database passwords, or private attachment content into issues or pull
26
+ requests.
27
+ - Report vulnerabilities through [SECURITY.md](SECURITY.md), not public issues.
28
+
29
+ ## Ways to Contribute
30
+
31
+ ### Report Bugs
32
+
33
+ Before opening an issue:
34
+
35
+ - search existing issues first
36
+ - verify the problem on the latest code in `main` when possible
37
+ - reproduce against synthetic data or a disposable local database
38
+
39
+ Include:
40
+
41
+ - a clear title
42
+ - exact reproduction steps
43
+ - expected behavior
44
+ - actual behavior
45
+ - relevant command output or stack traces
46
+ - OS, Python version, PostgreSQL version, and `uv` version
47
+
48
+ ### Suggest Enhancements
49
+
50
+ Good enhancement requests usually include:
51
+
52
+ - the workflow or mailbox-management problem you are solving
53
+ - the current limitation
54
+ - the proposed behavior
55
+ - concrete examples of expected REST, MCP, CLI, or database behavior
56
+
57
+ ### Submit Pull Requests
58
+
59
+ 1. Fork the repository and create a branch from `main`.
60
+ 2. Install dependencies.
61
+ 3. Make the smallest coherent change that solves the problem.
62
+ 4. Add or update tests when behavior changes.
63
+ 5. Update README or docs when public behavior, commands, endpoints, schemas, or
64
+ outputs change.
65
+ 6. Run the verification steps below before requesting review.
66
+
67
+ ## Contributor License Agreement
68
+
69
+ External contributions may require the project Contributor License Agreement,
70
+ enforced by CLA Assistant:
71
+
72
+ The CLA link is provided by CLA Assistant on pull requests when it is required.
73
+
74
+ When CLA Assistant comments on your pull request, follow the signing link and
75
+ sign in with GitHub to accept the agreement. The CLA confirms that you have the
76
+ right to submit the contribution and grants the project the rights needed to use
77
+ and redistribute it.
78
+
79
+ Developer Certificate of Origin style sign-off trailers are welcome as
80
+ additional contribution evidence, but DCO sign-off does not replace a required
81
+ CLA Assistant check:
82
+
83
+ ```text
84
+ Signed-off-by: Your Name <you@example.com>
85
+ ```
86
+
87
+ ## Governance and Continuity
88
+
89
+ Gmeow is maintained by Blackcat Informatics Inc. Project decisions are made
90
+ through GitHub issues, pull requests, discussions, security reports, and release
91
+ reviews, using the repository quality bar, security policy, and release process
92
+ as the decision framework.
93
+
94
+ Repository owners and code owners are listed in `.github/CODEOWNERS` when that
95
+ file exists.
96
+
97
+ ## Development Setup
98
+
99
+ ### Prerequisites
100
+
101
+ - Git
102
+ - Python 3.13+
103
+ - PostgreSQL suitable for integration testing
104
+ - `uv`
105
+
106
+ ### Local Setup
107
+
108
+ ```bash
109
+ uv sync --extra test
110
+ cp config.toml-example config.toml
111
+ ```
112
+
113
+ Use a disposable database for integration testing. Do not point tests or local
114
+ development at a production mailbox database.
115
+
116
+ Example local test DSN:
117
+
118
+ ```text
119
+ postgresql://gmeow-test:gmeow-test@127.0.0.1:5432/gmeow-test
120
+ ```
121
+
122
+ ## Project-Specific Guidance
123
+
124
+ - If REST endpoint behavior changes, update API examples and tests.
125
+ - If MCP tools change, update help text and any client-facing examples.
126
+ - If CLI behavior changes, update README usage and command verification.
127
+ - If database schema changes, add or update Alembic migrations and migration
128
+ comments.
129
+ - If attachment analysis behavior changes, cover privacy and hostile-file
130
+ handling risks in tests or docs.
131
+ - If archive, retention, or object-store behavior changes, verify data-loss and
132
+ restore paths explicitly.
133
+
134
+ ## Coding Style
135
+
136
+ Contributions must pass the project lint, formatting, and test gates.
137
+
138
+ - Python follows PEP 8 plus the stricter Ruff configuration in this repository.
139
+ - Type hints are expected for public functions and non-trivial helpers.
140
+ - Prefer structural fixes over lint suppressions.
141
+ - Keep changes focused; avoid unrelated refactors.
142
+ - Use standard library or existing project helpers before adding dependencies.
143
+
144
+ ## Verification
145
+
146
+ Before requesting review, run the checks relevant to your change:
147
+
148
+ ```bash
149
+ uvx ruff format --config ruff.toml main.py migrations scripts src
150
+ uvx ruff check --config ruff.toml main.py migrations scripts src
151
+ uv run python -m compileall main.py src migrations tests
152
+ uv run pytest
153
+ uv build
154
+ uv run python scripts/public_release_check.py
155
+ ```
156
+
157
+ For database-backed changes, also run against a disposable local PostgreSQL
158
+ database and document the DSN shape used, without committing private
159
+ credentials.
160
+
161
+ ## Commit Messages
162
+
163
+ We prefer Conventional Commits:
164
+
165
+ - `feat:` new functionality
166
+ - `fix:` bug fixes
167
+ - `docs:` documentation-only changes
168
+ - `refactor:` internal restructuring without behavior change
169
+ - `test:` test additions or updates
170
+ - `chore:` maintenance work
171
+
172
+ Examples:
173
+
174
+ ```text
175
+ feat: add archive verification endpoint
176
+ fix: preserve raw RFC822 during retention preview
177
+ docs: clarify local database setup
178
+ test: cover category exclusion in text search
179
+ ```
180
+
181
+ ## Questions
182
+
183
+ For public questions, open an issue or discussion in the repository. For private
184
+ matters, email <oss@blackcat.ca>.
185
+
186
+ ## License
187
+
188
+ By contributing, you agree that your contributions will be licensed under the
189
+ project license defined in [LICENSE](LICENSE).
gmeow-1.0.0/LICENSE ADDED
@@ -0,0 +1,9 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Blackcat Informatics® Inc.
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
6
+
7
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
8
+
9
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
gmeow-1.0.0/PKG-INFO ADDED
@@ -0,0 +1,215 @@
1
+ Metadata-Version: 2.4
2
+ Name: gmeow
3
+ Version: 1.0.0
4
+ Summary: Local Gmail intelligence server for agents, MCP, and REST automation
5
+ Project-URL: Homepage, https://github.com/blackcat-informatics/gmeow
6
+ Project-URL: Repository, https://github.com/blackcat-informatics/gmeow
7
+ Project-URL: Issues, https://github.com/blackcat-informatics/gmeow/issues
8
+ Project-URL: PyPI, https://pypi.org/project/gmeow/
9
+ Author: Blackcat Informatics Inc.
10
+ License-Expression: MIT
11
+ License-File: LICENSE
12
+ Keywords: agents,archive,email,gmail,knowledge-graph,mcp,semantic-search
13
+ Classifier: Development Status :: 3 - Alpha
14
+ Classifier: Framework :: FastAPI
15
+ Classifier: Intended Audience :: Developers
16
+ Classifier: License :: OSI Approved :: MIT License
17
+ Classifier: Programming Language :: Python :: 3
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Topic :: Communications :: Email
20
+ Classifier: Topic :: Database
21
+ Classifier: Topic :: Internet :: WWW/HTTP :: HTTP Servers
22
+ Requires-Python: >=3.13
23
+ Requires-Dist: alembic>=1.18.4
24
+ Requires-Dist: blake3>=1.0
25
+ Requires-Dist: en-core-web-sm
26
+ Requires-Dist: fastapi>=0.115
27
+ Requires-Dist: google-api-python-client>=2.150
28
+ Requires-Dist: google-auth-httplib2>=0.2
29
+ Requires-Dist: google-auth>=2.36
30
+ Requires-Dist: mcp>=1.8
31
+ Requires-Dist: pgvector>=0.3
32
+ Requires-Dist: psycopg[binary]>=3.2
33
+ Requires-Dist: pydantic>=2.10
34
+ Requires-Dist: rustworkx>=0.15
35
+ Requires-Dist: scikit-learn>=1.8.0
36
+ Requires-Dist: semchunk>=4.0.0
37
+ Requires-Dist: spacy>=3.8.14
38
+ Requires-Dist: tantivy>=0.22
39
+ Requires-Dist: uvicorn[standard]>=0.32
40
+ Requires-Dist: zstandard>=0.23
41
+ Provides-Extra: test
42
+ Requires-Dist: httpx>=0.27; extra == 'test'
43
+ Requires-Dist: pytest>=8.3; extra == 'test'
44
+ Description-Content-Type: text/markdown
45
+
46
+ <p align="center">
47
+ <img src="https://raw.githubusercontent.com/blackcat-informatics/gmeow/main/docs/gmeow-logo.svg" alt="Gmeow logo" width="280">
48
+ </p>
49
+
50
+ # Gmeow
51
+
52
+ Local Gmail intelligence for agents.
53
+
54
+ Gmeow turns a Gmail mailbox into a local intelligence layer for agents and automation. It exposes a loopback REST API and an MCP Streamable HTTP endpoint over an authenticated Gmail mailbox, while maintaining a local PostgreSQL-backed cache, semantic index, attachment object store, and knowledge graph.
55
+
56
+ Gmeow is designed for trusted single-user local systems. By default it binds to `127.0.0.1` and does not add application-level authentication. Do not expose it directly to an untrusted network.
57
+
58
+ ## What Gmeow Does
59
+
60
+ - Gives agents a local MCP and REST interface to search, read, and act on Gmail without scraping a browser.
61
+ - Builds a durable local archive with raw RFC822 messages, attachment payloads, labels, threads, sync state, and search indexes.
62
+ - Adds semantic search, category discovery, and knowledge-graph views over mailbox content.
63
+ - Extracts attachment metadata and text so documents, images, archives, PDFs, and calendar files become searchable context.
64
+ - Serves the cached archive through read-only IMAP for tools that already speak mail protocols.
65
+
66
+ ## Features
67
+
68
+ - Gmail mailbox access through Google Workspace service-account delegation or local user OAuth.
69
+ - REST and MCP tools for search, reads, labels, archive/read/star state, contacts, categories, graph exploration, and archive operations.
70
+ - PostgreSQL catalog for labels, threads, headers, MIME structure, categories, graph triples, jobs, sync state, and pgvector embedding chunks.
71
+ - Live Gmail search hydration: unbounded searches can query Gmail, cache returned messages, and enqueue analysis work.
72
+ - Full local object storage for payload bytes using a BLAKE3 content-addressed store with zstd compression for compressible content.
73
+ - Attachment sidecars with Gmail source metadata, `exiftool` metadata, extracted text, OCR, archive listings, document conversion, and optional vision captions.
74
+ - Semantic search using `semchunk` chunking and an OpenAI-compatible embedding endpoint.
75
+ - Knowledge graph extraction with RDF/RDFS, FOAF, SIOC, schema.org, SKOS, PROV-O, and DOAP alignment.
76
+ - Rustworkx graph projection, paths, ranking, centrality, components, project views, and related-node discovery.
77
+ - Category rules plus learned category suggestions from TF-IDF clustering.
78
+ - Timed maintenance jobs for history sync, priority sync, intelligence workers, derived views, PostgreSQL analyze, and optional sidecar refresh.
79
+ - Token-Oriented Object Notation by default for MCP responses, with JSON available on request.
80
+ - Read-only IMAP service backed by cached RFC822 archive objects.
81
+
82
+ ## Install
83
+
84
+ ```bash
85
+ uv sync --extra test
86
+ cp config.toml-example config.toml
87
+ ```
88
+
89
+ Edit `config.toml`. All Gmeow settings live under `[gmeow]` so this file can be shared with other Google proxy/archive applications.
90
+
91
+ ```toml
92
+ [gmeow]
93
+ subject = "user@example.com"
94
+ service_account_file = "data/secrets/service-account.json"
95
+ postgres_dsn = "postgresql://gmeow:change-me@127.0.0.1:5432/gmeow"
96
+ embedding_endpoint = "http://127.0.0.1:8090/v1/embeddings"
97
+
98
+ [gmeow.secrets]
99
+ file = "~/.config/gmeow/secrets.sops.yaml"
100
+ unlock_key = "replace-with-local-unlock-key"
101
+ age_key = "AGE-SECRET-KEY-REPLACE-WITH-LOCAL-SOPS-AGE-IDENTITY"
102
+ ```
103
+
104
+ When `[gmeow.secrets]` is configured, Gmeow decrypts the SOPS YAML file at startup and uses `postgres_dsn`, `service_account_json`, `user_credentials_json`, and `imap_password` from that single encrypted file. Configured credential file paths remain explicit fallback paths for local deployments that do not use SOPS.
105
+
106
+ Generate Google Cloud service-account provisioning commands:
107
+
108
+ ```bash
109
+ uv run gmeow provision-plan YOUR_PROJECT_ID
110
+ ```
111
+
112
+ Authorize the generated service-account OAuth client ID in Google Admin Console for:
113
+
114
+ ```text
115
+ https://www.googleapis.com/auth/gmail.modify
116
+ ```
117
+
118
+ As a local fallback, set `auth_mode = "user_oauth"` under `[gmeow]` and create Gmail-scoped ADC credentials:
119
+
120
+ ```bash
121
+ gcloud auth application-default login --scopes=https://www.googleapis.com/auth/gmail.modify,https://www.googleapis.com/auth/cloud-platform
122
+ ```
123
+
124
+ ## Run
125
+
126
+ ```bash
127
+ uv run gmeow serve --host 127.0.0.1 --port 8765
128
+ ```
129
+
130
+ REST is available under `/api/v1`. MCP is mounted at `/mcp` when the installed MCP SDK provides an ASGI app.
131
+
132
+ For persistent local operation under your user account, see [docs/systemd.md](docs/systemd.md).
133
+
134
+ Useful commands:
135
+
136
+ ```bash
137
+ uv run gmeow status
138
+ uv run gmeow doctor
139
+ uv run gmeow sync
140
+ uv run gmeow sync-history
141
+ uv run gmeow refresh-attachment-sidecars
142
+ uv run gmeow enqueue-intelligence
143
+ uv run gmeow run-intelligence-worker
144
+ uv run gmeow rebuild-intelligence
145
+ uv run gmeow discover-categories --since-hours 48
146
+ uv run gmeow seed-categories
147
+ uv run gmeow recategorize
148
+ uv run gmeow category-stats
149
+ ```
150
+
151
+ `gmeow serve` starts timed maintenance tasks from `[gmeow.maintenance]`. Use `GET /api/v1/maintenance/timed` for scheduler state and `POST /api/v1/maintenance/timed/{task_name}/run` to run one task immediately.
152
+
153
+ ## MCP
154
+
155
+ Most MCP read/search tools return compact TOON by default. Pass `format: "json"` when an agent needs JSON-shaped results.
156
+
157
+ Example local agent configuration:
158
+
159
+ ```json
160
+ {
161
+ "mcpServers": {
162
+ "gmeow": {
163
+ "type": "http",
164
+ "url": "http://127.0.0.1:8765/mcp"
165
+ }
166
+ }
167
+ }
168
+ ```
169
+
170
+ Useful MCP tools include text/semantic/hybrid search, attachment text search, message/thread reads, attachment metadata, contacts/people, category tools, graph search/path/rank/project tools, status/help, history sync, priority sync, and limited mailbox actions.
171
+
172
+ ## Local Data
173
+
174
+ By default, local data is ignored by git and stored under `data/`:
175
+
176
+ - PostgreSQL stores labels, threads, message headers/metadata, MIME structure, sync state, categories, graph triples, async jobs, and pgvector embedding chunks. Schema changes are versioned through Alembic migrations under `migrations/`.
177
+ - `data/objects/blake3/aa/bb/<digest>[.zst]` stores canonical payload bytes in the BLAKE3 content-addressed store.
178
+ - `data/objects/sidecars/aa/bb/<digest>.json` stores attachment sidecar metadata, source metadata, extracted text, and external enrichment.
179
+ - `data/tantivy/` stores the local lexical search index.
180
+ - `data/secrets/` stores local credential files only when you choose file-based credentials instead of SOPS.
181
+
182
+ ## Archive and IMAP
183
+
184
+ Archive-complete messages require both Gmail full JSON and canonical raw RFC822 bytes. New Gmail hydrations fetch RFC822 automatically. Existing cached messages can be completed in bounded batches:
185
+
186
+ ```bash
187
+ uv run gmeow complete-archive --limit 25
188
+ uv run gmeow archive-status
189
+ uv run gmeow verify-objects
190
+ ```
191
+
192
+ Read-only IMAP is available as a loopback service. It exposes Gmail labels as folders, assigns stable per-folder UIDs, serves RFC822 from CAS, and rejects mutating IMAP commands.
193
+
194
+ ```bash
195
+ printf 'choose-a-local-password\n' > data/secrets/imap-password
196
+ uv run gmeow serve-imap --host 127.0.0.1 --port 1143
197
+ ```
198
+
199
+ ## Development
200
+
201
+ ```bash
202
+ uv sync --extra test
203
+ uv run python -m compileall main.py src migrations tests
204
+ uv run pytest
205
+ uv build
206
+ uv run python scripts/public_release_check.py
207
+ ```
208
+
209
+ PostgreSQL-backed integration tests are skipped unless `GMEOW_TEST_POSTGRES_DSN` points at a disposable test database. Tests must not use a production database.
210
+
211
+ Before publishing, run the checklist in `docs/PUBLIC_RELEASE_CHECKLIST.md`.
212
+
213
+ ## License
214
+
215
+ Gmeow is licensed under the MIT License. See `LICENSE`.
gmeow-1.0.0/README.md ADDED
@@ -0,0 +1,170 @@
1
+ <p align="center">
2
+ <img src="https://raw.githubusercontent.com/blackcat-informatics/gmeow/main/docs/gmeow-logo.svg" alt="Gmeow logo" width="280">
3
+ </p>
4
+
5
+ # Gmeow
6
+
7
+ Local Gmail intelligence for agents.
8
+
9
+ Gmeow turns a Gmail mailbox into a local intelligence layer for agents and automation. It exposes a loopback REST API and an MCP Streamable HTTP endpoint over an authenticated Gmail mailbox, while maintaining a local PostgreSQL-backed cache, semantic index, attachment object store, and knowledge graph.
10
+
11
+ Gmeow is designed for trusted single-user local systems. By default it binds to `127.0.0.1` and does not add application-level authentication. Do not expose it directly to an untrusted network.
12
+
13
+ ## What Gmeow Does
14
+
15
+ - Gives agents a local MCP and REST interface to search, read, and act on Gmail without scraping a browser.
16
+ - Builds a durable local archive with raw RFC822 messages, attachment payloads, labels, threads, sync state, and search indexes.
17
+ - Adds semantic search, category discovery, and knowledge-graph views over mailbox content.
18
+ - Extracts attachment metadata and text so documents, images, archives, PDFs, and calendar files become searchable context.
19
+ - Serves the cached archive through read-only IMAP for tools that already speak mail protocols.
20
+
21
+ ## Features
22
+
23
+ - Gmail mailbox access through Google Workspace service-account delegation or local user OAuth.
24
+ - REST and MCP tools for search, reads, labels, archive/read/star state, contacts, categories, graph exploration, and archive operations.
25
+ - PostgreSQL catalog for labels, threads, headers, MIME structure, categories, graph triples, jobs, sync state, and pgvector embedding chunks.
26
+ - Live Gmail search hydration: unbounded searches can query Gmail, cache returned messages, and enqueue analysis work.
27
+ - Full local object storage for payload bytes using a BLAKE3 content-addressed store with zstd compression for compressible content.
28
+ - Attachment sidecars with Gmail source metadata, `exiftool` metadata, extracted text, OCR, archive listings, document conversion, and optional vision captions.
29
+ - Semantic search using `semchunk` chunking and an OpenAI-compatible embedding endpoint.
30
+ - Knowledge graph extraction with RDF/RDFS, FOAF, SIOC, schema.org, SKOS, PROV-O, and DOAP alignment.
31
+ - Rustworkx graph projection, paths, ranking, centrality, components, project views, and related-node discovery.
32
+ - Category rules plus learned category suggestions from TF-IDF clustering.
33
+ - Timed maintenance jobs for history sync, priority sync, intelligence workers, derived views, PostgreSQL analyze, and optional sidecar refresh.
34
+ - Token-Oriented Object Notation by default for MCP responses, with JSON available on request.
35
+ - Read-only IMAP service backed by cached RFC822 archive objects.
36
+
37
+ ## Install
38
+
39
+ ```bash
40
+ uv sync --extra test
41
+ cp config.toml-example config.toml
42
+ ```
43
+
44
+ Edit `config.toml`. All Gmeow settings live under `[gmeow]` so this file can be shared with other Google proxy/archive applications.
45
+
46
+ ```toml
47
+ [gmeow]
48
+ subject = "user@example.com"
49
+ service_account_file = "data/secrets/service-account.json"
50
+ postgres_dsn = "postgresql://gmeow:change-me@127.0.0.1:5432/gmeow"
51
+ embedding_endpoint = "http://127.0.0.1:8090/v1/embeddings"
52
+
53
+ [gmeow.secrets]
54
+ file = "~/.config/gmeow/secrets.sops.yaml"
55
+ unlock_key = "replace-with-local-unlock-key"
56
+ age_key = "AGE-SECRET-KEY-REPLACE-WITH-LOCAL-SOPS-AGE-IDENTITY"
57
+ ```
58
+
59
+ When `[gmeow.secrets]` is configured, Gmeow decrypts the SOPS YAML file at startup and uses `postgres_dsn`, `service_account_json`, `user_credentials_json`, and `imap_password` from that single encrypted file. Configured credential file paths remain explicit fallback paths for local deployments that do not use SOPS.
60
+
61
+ Generate Google Cloud service-account provisioning commands:
62
+
63
+ ```bash
64
+ uv run gmeow provision-plan YOUR_PROJECT_ID
65
+ ```
66
+
67
+ Authorize the generated service-account OAuth client ID in Google Admin Console for:
68
+
69
+ ```text
70
+ https://www.googleapis.com/auth/gmail.modify
71
+ ```
72
+
73
+ As a local fallback, set `auth_mode = "user_oauth"` under `[gmeow]` and create Gmail-scoped ADC credentials:
74
+
75
+ ```bash
76
+ gcloud auth application-default login --scopes=https://www.googleapis.com/auth/gmail.modify,https://www.googleapis.com/auth/cloud-platform
77
+ ```
78
+
79
+ ## Run
80
+
81
+ ```bash
82
+ uv run gmeow serve --host 127.0.0.1 --port 8765
83
+ ```
84
+
85
+ REST is available under `/api/v1`. MCP is mounted at `/mcp` when the installed MCP SDK provides an ASGI app.
86
+
87
+ For persistent local operation under your user account, see [docs/systemd.md](docs/systemd.md).
88
+
89
+ Useful commands:
90
+
91
+ ```bash
92
+ uv run gmeow status
93
+ uv run gmeow doctor
94
+ uv run gmeow sync
95
+ uv run gmeow sync-history
96
+ uv run gmeow refresh-attachment-sidecars
97
+ uv run gmeow enqueue-intelligence
98
+ uv run gmeow run-intelligence-worker
99
+ uv run gmeow rebuild-intelligence
100
+ uv run gmeow discover-categories --since-hours 48
101
+ uv run gmeow seed-categories
102
+ uv run gmeow recategorize
103
+ uv run gmeow category-stats
104
+ ```
105
+
106
+ `gmeow serve` starts timed maintenance tasks from `[gmeow.maintenance]`. Use `GET /api/v1/maintenance/timed` for scheduler state and `POST /api/v1/maintenance/timed/{task_name}/run` to run one task immediately.
107
+
108
+ ## MCP
109
+
110
+ Most MCP read/search tools return compact TOON by default. Pass `format: "json"` when an agent needs JSON-shaped results.
111
+
112
+ Example local agent configuration:
113
+
114
+ ```json
115
+ {
116
+ "mcpServers": {
117
+ "gmeow": {
118
+ "type": "http",
119
+ "url": "http://127.0.0.1:8765/mcp"
120
+ }
121
+ }
122
+ }
123
+ ```
124
+
125
+ Useful MCP tools include text/semantic/hybrid search, attachment text search, message/thread reads, attachment metadata, contacts/people, category tools, graph search/path/rank/project tools, status/help, history sync, priority sync, and limited mailbox actions.
126
+
127
+ ## Local Data
128
+
129
+ By default, local data is ignored by git and stored under `data/`:
130
+
131
+ - PostgreSQL stores labels, threads, message headers/metadata, MIME structure, sync state, categories, graph triples, async jobs, and pgvector embedding chunks. Schema changes are versioned through Alembic migrations under `migrations/`.
132
+ - `data/objects/blake3/aa/bb/<digest>[.zst]` stores canonical payload bytes in the BLAKE3 content-addressed store.
133
+ - `data/objects/sidecars/aa/bb/<digest>.json` stores attachment sidecar metadata, source metadata, extracted text, and external enrichment.
134
+ - `data/tantivy/` stores the local lexical search index.
135
+ - `data/secrets/` stores local credential files only when you choose file-based credentials instead of SOPS.
136
+
137
+ ## Archive and IMAP
138
+
139
+ Archive-complete messages require both Gmail full JSON and canonical raw RFC822 bytes. New Gmail hydrations fetch RFC822 automatically. Existing cached messages can be completed in bounded batches:
140
+
141
+ ```bash
142
+ uv run gmeow complete-archive --limit 25
143
+ uv run gmeow archive-status
144
+ uv run gmeow verify-objects
145
+ ```
146
+
147
+ Read-only IMAP is available as a loopback service. It exposes Gmail labels as folders, assigns stable per-folder UIDs, serves RFC822 from CAS, and rejects mutating IMAP commands.
148
+
149
+ ```bash
150
+ printf 'choose-a-local-password\n' > data/secrets/imap-password
151
+ uv run gmeow serve-imap --host 127.0.0.1 --port 1143
152
+ ```
153
+
154
+ ## Development
155
+
156
+ ```bash
157
+ uv sync --extra test
158
+ uv run python -m compileall main.py src migrations tests
159
+ uv run pytest
160
+ uv build
161
+ uv run python scripts/public_release_check.py
162
+ ```
163
+
164
+ PostgreSQL-backed integration tests are skipped unless `GMEOW_TEST_POSTGRES_DSN` points at a disposable test database. Tests must not use a production database.
165
+
166
+ Before publishing, run the checklist in `docs/PUBLIC_RELEASE_CHECKLIST.md`.
167
+
168
+ ## License
169
+
170
+ Gmeow is licensed under the MIT License. See `LICENSE`.