diffgrab 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,12 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ *.egg-info/
4
+ dist/
5
+ build/
6
+ .venv/
7
+ .pytest_cache/
8
+ .ruff_cache/
9
+ .env
10
+ .env.local
11
+ CLAUDE.md
12
+ PLAN.txt
diffgrab-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 QuartzUnit
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,237 @@
1
+ Metadata-Version: 2.4
2
+ Name: diffgrab
3
+ Version: 0.1.0
4
+ Summary: Web page change tracking with structured diffs — markgrab + snapgrab integration, MCP native.
5
+ Project-URL: Homepage, https://github.com/QuartzUnit/diffgrab
6
+ Project-URL: Repository, https://github.com/QuartzUnit/diffgrab
7
+ Author: QuartzUnit
8
+ License-Expression: MIT
9
+ License-File: LICENSE
10
+ Keywords: change-detection,diff,markgrab,mcp,monitoring,web
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Programming Language :: Python :: 3.13
17
+ Classifier: Typing :: Typed
18
+ Requires-Python: >=3.11
19
+ Requires-Dist: httpx>=0.28
20
+ Requires-Dist: markgrab>=0.1.2
21
+ Provides-Extra: all
22
+ Requires-Dist: click>=8.0; extra == 'all'
23
+ Requires-Dist: fastmcp>=2.0; extra == 'all'
24
+ Requires-Dist: rich>=13.0; extra == 'all'
25
+ Requires-Dist: snapgrab>=0.1.0; extra == 'all'
26
+ Provides-Extra: cli
27
+ Requires-Dist: click>=8.0; extra == 'cli'
28
+ Requires-Dist: rich>=13.0; extra == 'cli'
29
+ Provides-Extra: dev
30
+ Requires-Dist: pytest-asyncio>=0.24; extra == 'dev'
31
+ Requires-Dist: pytest>=8.0; extra == 'dev'
32
+ Requires-Dist: ruff>=0.8; extra == 'dev'
33
+ Provides-Extra: mcp
34
+ Requires-Dist: fastmcp>=2.0; extra == 'mcp'
35
+ Provides-Extra: visual
36
+ Requires-Dist: snapgrab>=0.1.0; extra == 'visual'
37
+ Description-Content-Type: text/markdown
38
+
39
+ # diffgrab
40
+
41
+ [![PyPI](https://img.shields.io/pypi/v/diffgrab)](https://pypi.org/project/diffgrab/)
42
+ [![Python](https://img.shields.io/pypi/pyversions/diffgrab)](https://pypi.org/project/diffgrab/)
43
+ [![License](https://img.shields.io/github/license/QuartzUnit/diffgrab)](https://github.com/QuartzUnit/diffgrab/blob/main/LICENSE)
44
+
45
+ > Web page change tracking with structured diffs. markgrab + snapgrab integration, MCP native.
46
+
47
+ ```python
48
+ from diffgrab import DiffTracker
49
+
50
+ tracker = DiffTracker()
51
+ await tracker.track("https://example.com")
52
+ changes = await tracker.check()
53
+ for c in changes:
54
+ if c.changed:
55
+ print(c.summary) # "3 lines added, 1 lines removed in sections: Introduction."
56
+ print(c.unified_diff) # Standard unified diff output
57
+ await tracker.close()
58
+ ```
59
+
60
+ ## Features
61
+
62
+ - **Change detection** — track any URL, detect content changes via content hashing
63
+ - **Structured diffs** — unified diff + section-level analysis (which headings changed)
64
+ - **Human-readable summaries** — "5 lines added, 2 removed in sections: Intro, Methods"
65
+ - **Snapshot history** — SQLite storage, browse past versions of any page
66
+ - **markgrab powered** — HTML/YouTube/PDF/DOCX extraction via [markgrab](https://github.com/QuartzUnit/markgrab)
67
+ - **Visual diff** — optional screenshot comparison via [snapgrab](https://github.com/QuartzUnit/snapgrab)
68
+ - **MCP server** — 5 tools for Claude Code / MCP clients
69
+ - **CLI included** — `diffgrab track`, `check`, `diff`, `history`, `untrack`
70
+
71
+ ## Install
72
+
73
+ ```bash
74
+ pip install diffgrab
75
+ ```
76
+
77
+ Optional extras:
78
+
79
+ ```bash
80
+ pip install 'diffgrab[cli]' # CLI with click + rich
81
+ pip install 'diffgrab[visual]' # Visual diff with snapgrab
82
+ pip install 'diffgrab[mcp]' # MCP server with fastmcp
83
+ pip install 'diffgrab[all]' # Everything
84
+ ```
85
+
86
+ ## Usage
87
+
88
+ ### Python API
89
+
90
+ ```python
91
+ import asyncio
92
+ from diffgrab import DiffTracker
93
+
94
+ async def main():
95
+ tracker = DiffTracker()
96
+
97
+ # Track a URL (takes initial snapshot)
98
+ await tracker.track("https://example.com", interval_hours=12)
99
+
100
+ # Check for changes
101
+ changes = await tracker.check()
102
+ for change in changes:
103
+ if change.changed:
104
+ print(change.summary)
105
+ print(change.unified_diff)
106
+
107
+ # Get diff between specific snapshots
108
+ result = await tracker.diff("https://example.com", before_id=1, after_id=2)
109
+
110
+ # Browse snapshot history
111
+ history = await tracker.history("https://example.com", count=20)
112
+
113
+ # Stop tracking
114
+ await tracker.untrack("https://example.com")
115
+
116
+ await tracker.close()
117
+
118
+ asyncio.run(main())
119
+ ```
120
+
121
+ ### Convenience Functions
122
+
123
+ ```python
124
+ from diffgrab import track, check, diff, history, untrack
125
+
126
+ await track("https://example.com")
127
+ changes = await check()
128
+ result = await diff("https://example.com")
129
+ snaps = await history("https://example.com")
130
+ await untrack("https://example.com")
131
+ ```
132
+
133
+ ### CLI
134
+
135
+ ```bash
136
+ # Track a URL
137
+ diffgrab track https://example.com --interval 12
138
+
139
+ # Check all tracked URLs for changes
140
+ diffgrab check
141
+
142
+ # Check a specific URL
143
+ diffgrab check https://example.com
144
+
145
+ # Show diff between snapshots
146
+ diffgrab diff https://example.com
147
+ diffgrab diff https://example.com --before 1 --after 3
148
+
149
+ # View snapshot history
150
+ diffgrab history https://example.com --count 20
151
+
152
+ # Stop tracking
153
+ diffgrab untrack https://example.com
154
+ ```
155
+
156
+ ### MCP Server
157
+
158
+ Add to your Claude Code MCP config:
159
+
160
+ ```json
161
+ {
162
+ "mcpServers": {
163
+ "diffgrab": {
164
+ "command": "diffgrab-mcp",
165
+ "args": []
166
+ }
167
+ }
168
+ }
169
+ ```
170
+
171
+ Or with uvx:
172
+
173
+ ```json
174
+ {
175
+ "mcpServers": {
176
+ "diffgrab": {
177
+ "command": "uvx",
178
+ "args": ["--from", "diffgrab[mcp]", "diffgrab-mcp"]
179
+ }
180
+ }
181
+ }
182
+ ```
183
+
184
+ **MCP Tools:**
185
+
186
+ | Tool | Description |
187
+ |------|-------------|
188
+ | `track_url` | Register a URL for change tracking |
189
+ | `check_changes` | Check tracked URLs for changes |
190
+ | `get_diff` | Get structured diff between snapshots |
191
+ | `get_history` | Browse snapshot history |
192
+ | `untrack_url` | Stop tracking a URL |
193
+
194
+ ## DiffResult
195
+
196
+ Every diff operation returns a `DiffResult`:
197
+
198
+ ```python
199
+ @dataclass
200
+ class DiffResult:
201
+ url: str # The tracked URL
202
+ changed: bool # Whether content changed
203
+ added_lines: int # Lines added
204
+ removed_lines: int # Lines removed
205
+ changed_sections: list[str] # Markdown headings with changes
206
+ unified_diff: str # Standard unified diff
207
+ summary: str # Human-readable summary
208
+ before_snapshot_id: int | None # DB ID of older snapshot
209
+ after_snapshot_id: int | None # DB ID of newer snapshot
210
+ before_timestamp: str # When older snapshot was taken
211
+ after_timestamp: str # When newer snapshot was taken
212
+ ```
213
+
214
+ ## Storage
215
+
216
+ Snapshots are stored in SQLite at `~/.local/share/diffgrab/diffgrab.db` (auto-created). Custom path:
217
+
218
+ ```python
219
+ tracker = DiffTracker(db_path="/path/to/custom.db")
220
+ ```
221
+
222
+ ## QuartzUnit Ecosystem
223
+
224
+ | Package | Role | PyPI |
225
+ |---------|------|------|
226
+ | [markgrab](https://github.com/QuartzUnit/markgrab) | HTML/YouTube/PDF/DOCX to markdown | `pip install markgrab` |
227
+ | [snapgrab](https://github.com/QuartzUnit/snapgrab) | URL to screenshot + metadata | `pip install snapgrab` |
228
+ | [docpick](https://github.com/QuartzUnit/docpick) | OCR + LLM document extraction | `pip install docpick` |
229
+ | [feedkit](https://github.com/QuartzUnit/feedkit) | RSS feed collection | `pip install feedkit` |
230
+ | **diffgrab** | **Web page change tracking** | `pip install diffgrab` |
231
+ | [browsegrab](https://github.com/QuartzUnit/browsegrab) | Browser agent for LLMs | Coming soon |
232
+
233
+ ## License
234
+
235
+ [MIT](LICENSE)
236
+
237
+ <!-- mcp-name: io.github.ArkNill/diffgrab -->
@@ -0,0 +1,199 @@
1
+ # diffgrab
2
+
3
+ [![PyPI](https://img.shields.io/pypi/v/diffgrab)](https://pypi.org/project/diffgrab/)
4
+ [![Python](https://img.shields.io/pypi/pyversions/diffgrab)](https://pypi.org/project/diffgrab/)
5
+ [![License](https://img.shields.io/github/license/QuartzUnit/diffgrab)](https://github.com/QuartzUnit/diffgrab/blob/main/LICENSE)
6
+
7
+ > Web page change tracking with structured diffs. markgrab + snapgrab integration, MCP native.
8
+
9
+ ```python
10
+ from diffgrab import DiffTracker
11
+
12
+ tracker = DiffTracker()
13
+ await tracker.track("https://example.com")
14
+ changes = await tracker.check()
15
+ for c in changes:
16
+ if c.changed:
17
+ print(c.summary) # "3 lines added, 1 lines removed in sections: Introduction."
18
+ print(c.unified_diff) # Standard unified diff output
19
+ await tracker.close()
20
+ ```
21
+
22
+ ## Features
23
+
24
+ - **Change detection** — track any URL, detect content changes via content hashing
25
+ - **Structured diffs** — unified diff + section-level analysis (which headings changed)
26
+ - **Human-readable summaries** — "5 lines added, 2 removed in sections: Intro, Methods"
27
+ - **Snapshot history** — SQLite storage, browse past versions of any page
28
+ - **markgrab powered** — HTML/YouTube/PDF/DOCX extraction via [markgrab](https://github.com/QuartzUnit/markgrab)
29
+ - **Visual diff** — optional screenshot comparison via [snapgrab](https://github.com/QuartzUnit/snapgrab)
30
+ - **MCP server** — 5 tools for Claude Code / MCP clients
31
+ - **CLI included** — `diffgrab track`, `check`, `diff`, `history`, `untrack`
32
+
33
+ ## Install
34
+
35
+ ```bash
36
+ pip install diffgrab
37
+ ```
38
+
39
+ Optional extras:
40
+
41
+ ```bash
42
+ pip install 'diffgrab[cli]' # CLI with click + rich
43
+ pip install 'diffgrab[visual]' # Visual diff with snapgrab
44
+ pip install 'diffgrab[mcp]' # MCP server with fastmcp
45
+ pip install 'diffgrab[all]' # Everything
46
+ ```
47
+
48
+ ## Usage
49
+
50
+ ### Python API
51
+
52
+ ```python
53
+ import asyncio
54
+ from diffgrab import DiffTracker
55
+
56
+ async def main():
57
+ tracker = DiffTracker()
58
+
59
+ # Track a URL (takes initial snapshot)
60
+ await tracker.track("https://example.com", interval_hours=12)
61
+
62
+ # Check for changes
63
+ changes = await tracker.check()
64
+ for change in changes:
65
+ if change.changed:
66
+ print(change.summary)
67
+ print(change.unified_diff)
68
+
69
+ # Get diff between specific snapshots
70
+ result = await tracker.diff("https://example.com", before_id=1, after_id=2)
71
+
72
+ # Browse snapshot history
73
+ history = await tracker.history("https://example.com", count=20)
74
+
75
+ # Stop tracking
76
+ await tracker.untrack("https://example.com")
77
+
78
+ await tracker.close()
79
+
80
+ asyncio.run(main())
81
+ ```
82
+
83
+ ### Convenience Functions
84
+
85
+ ```python
86
+ from diffgrab import track, check, diff, history, untrack
87
+
88
+ await track("https://example.com")
89
+ changes = await check()
90
+ result = await diff("https://example.com")
91
+ snaps = await history("https://example.com")
92
+ await untrack("https://example.com")
93
+ ```
94
+
95
+ ### CLI
96
+
97
+ ```bash
98
+ # Track a URL
99
+ diffgrab track https://example.com --interval 12
100
+
101
+ # Check all tracked URLs for changes
102
+ diffgrab check
103
+
104
+ # Check a specific URL
105
+ diffgrab check https://example.com
106
+
107
+ # Show diff between snapshots
108
+ diffgrab diff https://example.com
109
+ diffgrab diff https://example.com --before 1 --after 3
110
+
111
+ # View snapshot history
112
+ diffgrab history https://example.com --count 20
113
+
114
+ # Stop tracking
115
+ diffgrab untrack https://example.com
116
+ ```
117
+
118
+ ### MCP Server
119
+
120
+ Add to your Claude Code MCP config:
121
+
122
+ ```json
123
+ {
124
+ "mcpServers": {
125
+ "diffgrab": {
126
+ "command": "diffgrab-mcp",
127
+ "args": []
128
+ }
129
+ }
130
+ }
131
+ ```
132
+
133
+ Or with uvx:
134
+
135
+ ```json
136
+ {
137
+ "mcpServers": {
138
+ "diffgrab": {
139
+ "command": "uvx",
140
+ "args": ["--from", "diffgrab[mcp]", "diffgrab-mcp"]
141
+ }
142
+ }
143
+ }
144
+ ```
145
+
146
+ **MCP Tools:**
147
+
148
+ | Tool | Description |
149
+ |------|-------------|
150
+ | `track_url` | Register a URL for change tracking |
151
+ | `check_changes` | Check tracked URLs for changes |
152
+ | `get_diff` | Get structured diff between snapshots |
153
+ | `get_history` | Browse snapshot history |
154
+ | `untrack_url` | Stop tracking a URL |
155
+
156
+ ## DiffResult
157
+
158
+ Every diff operation returns a `DiffResult`:
159
+
160
+ ```python
161
+ @dataclass
162
+ class DiffResult:
163
+ url: str # The tracked URL
164
+ changed: bool # Whether content changed
165
+ added_lines: int # Lines added
166
+ removed_lines: int # Lines removed
167
+ changed_sections: list[str] # Markdown headings with changes
168
+ unified_diff: str # Standard unified diff
169
+ summary: str # Human-readable summary
170
+ before_snapshot_id: int | None # DB ID of older snapshot
171
+ after_snapshot_id: int | None # DB ID of newer snapshot
172
+ before_timestamp: str # When older snapshot was taken
173
+ after_timestamp: str # When newer snapshot was taken
174
+ ```
175
+
176
+ ## Storage
177
+
178
+ Snapshots are stored in SQLite at `~/.local/share/diffgrab/diffgrab.db` (auto-created). Custom path:
179
+
180
+ ```python
181
+ tracker = DiffTracker(db_path="/path/to/custom.db")
182
+ ```
183
+
184
+ ## QuartzUnit Ecosystem
185
+
186
+ | Package | Role | PyPI |
187
+ |---------|------|------|
188
+ | [markgrab](https://github.com/QuartzUnit/markgrab) | HTML/YouTube/PDF/DOCX to markdown | `pip install markgrab` |
189
+ | [snapgrab](https://github.com/QuartzUnit/snapgrab) | URL to screenshot + metadata | `pip install snapgrab` |
190
+ | [docpick](https://github.com/QuartzUnit/docpick) | OCR + LLM document extraction | `pip install docpick` |
191
+ | [feedkit](https://github.com/QuartzUnit/feedkit) | RSS feed collection | `pip install feedkit` |
192
+ | **diffgrab** | **Web page change tracking** | `pip install diffgrab` |
193
+ | [browsegrab](https://github.com/QuartzUnit/browsegrab) | Browser agent for LLMs | Coming soon |
194
+
195
+ ## License
196
+
197
+ [MIT](LICENSE)
198
+
199
+ <!-- mcp-name: io.github.ArkNill/diffgrab -->
@@ -0,0 +1,109 @@
1
+ """diffgrab — Web page change tracking with structured diffs."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from diffgrab.differ import DiffResult
6
+ from diffgrab.tracker import DiffTracker
7
+
8
+ __all__ = ["DiffTracker", "DiffResult", "track", "check", "diff", "history", "untrack"]
9
+ __version__ = "0.1.0"
10
+
11
+
12
+ async def track(url: str, interval_hours: int = 24, *, db_path: str = "") -> str:
13
+ """Register a URL for change tracking.
14
+
15
+ Args:
16
+ url: The URL to track.
17
+ interval_hours: Check interval in hours (default: 24).
18
+ db_path: Custom database path (optional).
19
+
20
+ Returns:
21
+ Status message.
22
+ """
23
+ kwargs = {"db_path": db_path} if db_path else {}
24
+ tracker = DiffTracker(**kwargs)
25
+ try:
26
+ return await tracker.track(url, interval_hours)
27
+ finally:
28
+ await tracker.close()
29
+
30
+
31
+ async def check(url: str | None = None, *, db_path: str = "") -> list[DiffResult]:
32
+ """Check tracked URLs for changes.
33
+
34
+ Args:
35
+ url: Specific URL to check, or None for all.
36
+ db_path: Custom database path (optional).
37
+
38
+ Returns:
39
+ List of DiffResult objects.
40
+ """
41
+ kwargs = {"db_path": db_path} if db_path else {}
42
+ tracker = DiffTracker(**kwargs)
43
+ try:
44
+ return await tracker.check(url)
45
+ finally:
46
+ await tracker.close()
47
+
48
+
49
+ async def diff(
50
+ url: str,
51
+ before_id: int | None = None,
52
+ after_id: int | None = None,
53
+ *,
54
+ db_path: str = "",
55
+ ) -> DiffResult:
56
+ """Get structured diff between two snapshots of a URL.
57
+
58
+ Args:
59
+ url: The URL to diff.
60
+ before_id: Database ID of the older snapshot.
61
+ after_id: Database ID of the newer snapshot.
62
+ db_path: Custom database path (optional).
63
+
64
+ Returns:
65
+ DiffResult with structured diff.
66
+ """
67
+ kwargs = {"db_path": db_path} if db_path else {}
68
+ tracker = DiffTracker(**kwargs)
69
+ try:
70
+ return await tracker.diff(url, before_id, after_id)
71
+ finally:
72
+ await tracker.close()
73
+
74
+
75
+ async def history(url: str, count: int = 10, *, db_path: str = "") -> list[dict]:
76
+ """Get snapshot history for a URL.
77
+
78
+ Args:
79
+ url: The URL to get history for.
80
+ count: Maximum number of snapshots (default: 10).
81
+ db_path: Custom database path (optional).
82
+
83
+ Returns:
84
+ List of snapshot metadata dicts.
85
+ """
86
+ kwargs = {"db_path": db_path} if db_path else {}
87
+ tracker = DiffTracker(**kwargs)
88
+ try:
89
+ return await tracker.history(url, count)
90
+ finally:
91
+ await tracker.close()
92
+
93
+
94
+ async def untrack(url: str, *, db_path: str = "") -> str:
95
+ """Remove a URL from tracking.
96
+
97
+ Args:
98
+ url: The URL to untrack.
99
+ db_path: Custom database path (optional).
100
+
101
+ Returns:
102
+ Status message.
103
+ """
104
+ kwargs = {"db_path": db_path} if db_path else {}
105
+ tracker = DiffTracker(**kwargs)
106
+ try:
107
+ return await tracker.untrack(url)
108
+ finally:
109
+ await tracker.close()