diffgrab 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffgrab-0.1.0/.gitignore +12 -0
- diffgrab-0.1.0/LICENSE +21 -0
- diffgrab-0.1.0/PKG-INFO +237 -0
- diffgrab-0.1.0/README.md +199 -0
- diffgrab-0.1.0/diffgrab/__init__.py +109 -0
- diffgrab-0.1.0/diffgrab/__main__.py +132 -0
- diffgrab-0.1.0/diffgrab/db.py +167 -0
- diffgrab-0.1.0/diffgrab/differ.py +179 -0
- diffgrab-0.1.0/diffgrab/mcp_server.py +138 -0
- diffgrab-0.1.0/diffgrab/tracker.py +241 -0
- diffgrab-0.1.0/diffgrab/visual.py +181 -0
- diffgrab-0.1.0/pyproject.toml +50 -0
- diffgrab-0.1.0/tests/__init__.py +0 -0
- diffgrab-0.1.0/tests/test_db.py +203 -0
- diffgrab-0.1.0/tests/test_differ.py +251 -0
- diffgrab-0.1.0/tests/test_tracker.py +266 -0
- diffgrab-0.1.0/tests/test_visual.py +210 -0
diffgrab-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 QuartzUnit
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
diffgrab-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: diffgrab
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Web page change tracking with structured diffs — markgrab + snapgrab integration, MCP native.
|
|
5
|
+
Project-URL: Homepage, https://github.com/QuartzUnit/diffgrab
|
|
6
|
+
Project-URL: Repository, https://github.com/QuartzUnit/diffgrab
|
|
7
|
+
Author: QuartzUnit
|
|
8
|
+
License-Expression: MIT
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Keywords: change-detection,diff,markgrab,mcp,monitoring,web
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
17
|
+
Classifier: Typing :: Typed
|
|
18
|
+
Requires-Python: >=3.11
|
|
19
|
+
Requires-Dist: httpx>=0.28
|
|
20
|
+
Requires-Dist: markgrab>=0.1.2
|
|
21
|
+
Provides-Extra: all
|
|
22
|
+
Requires-Dist: click>=8.0; extra == 'all'
|
|
23
|
+
Requires-Dist: fastmcp>=2.0; extra == 'all'
|
|
24
|
+
Requires-Dist: rich>=13.0; extra == 'all'
|
|
25
|
+
Requires-Dist: snapgrab>=0.1.0; extra == 'all'
|
|
26
|
+
Provides-Extra: cli
|
|
27
|
+
Requires-Dist: click>=8.0; extra == 'cli'
|
|
28
|
+
Requires-Dist: rich>=13.0; extra == 'cli'
|
|
29
|
+
Provides-Extra: dev
|
|
30
|
+
Requires-Dist: pytest-asyncio>=0.24; extra == 'dev'
|
|
31
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
32
|
+
Requires-Dist: ruff>=0.8; extra == 'dev'
|
|
33
|
+
Provides-Extra: mcp
|
|
34
|
+
Requires-Dist: fastmcp>=2.0; extra == 'mcp'
|
|
35
|
+
Provides-Extra: visual
|
|
36
|
+
Requires-Dist: snapgrab>=0.1.0; extra == 'visual'
|
|
37
|
+
Description-Content-Type: text/markdown
|
|
38
|
+
|
|
39
|
+
# diffgrab
|
|
40
|
+
|
|
41
|
+
[](https://pypi.org/project/diffgrab/)
|
|
42
|
+
[](https://pypi.org/project/diffgrab/)
|
|
43
|
+
[](https://github.com/QuartzUnit/diffgrab/blob/main/LICENSE)
|
|
44
|
+
|
|
45
|
+
> Web page change tracking with structured diffs. markgrab + snapgrab integration, MCP native.
|
|
46
|
+
|
|
47
|
+
```python
|
|
48
|
+
from diffgrab import DiffTracker
|
|
49
|
+
|
|
50
|
+
tracker = DiffTracker()
|
|
51
|
+
await tracker.track("https://example.com")
|
|
52
|
+
changes = await tracker.check()
|
|
53
|
+
for c in changes:
|
|
54
|
+
if c.changed:
|
|
55
|
+
print(c.summary) # "3 lines added, 1 lines removed in sections: Introduction."
|
|
56
|
+
print(c.unified_diff) # Standard unified diff output
|
|
57
|
+
await tracker.close()
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
## Features
|
|
61
|
+
|
|
62
|
+
- **Change detection** — track any URL, detect content changes via content hashing
|
|
63
|
+
- **Structured diffs** — unified diff + section-level analysis (which headings changed)
|
|
64
|
+
- **Human-readable summaries** — "5 lines added, 2 removed in sections: Intro, Methods"
|
|
65
|
+
- **Snapshot history** — SQLite storage, browse past versions of any page
|
|
66
|
+
- **markgrab powered** — HTML/YouTube/PDF/DOCX extraction via [markgrab](https://github.com/QuartzUnit/markgrab)
|
|
67
|
+
- **Visual diff** — optional screenshot comparison via [snapgrab](https://github.com/QuartzUnit/snapgrab)
|
|
68
|
+
- **MCP server** — 5 tools for Claude Code / MCP clients
|
|
69
|
+
- **CLI included** — `diffgrab track`, `check`, `diff`, `history`, `untrack`
|
|
70
|
+
|
|
71
|
+
## Install
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
pip install diffgrab
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
Optional extras:
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
pip install 'diffgrab[cli]' # CLI with click + rich
|
|
81
|
+
pip install 'diffgrab[visual]' # Visual diff with snapgrab
|
|
82
|
+
pip install 'diffgrab[mcp]' # MCP server with fastmcp
|
|
83
|
+
pip install 'diffgrab[all]' # Everything
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
## Usage
|
|
87
|
+
|
|
88
|
+
### Python API
|
|
89
|
+
|
|
90
|
+
```python
|
|
91
|
+
import asyncio
|
|
92
|
+
from diffgrab import DiffTracker
|
|
93
|
+
|
|
94
|
+
async def main():
|
|
95
|
+
tracker = DiffTracker()
|
|
96
|
+
|
|
97
|
+
# Track a URL (takes initial snapshot)
|
|
98
|
+
await tracker.track("https://example.com", interval_hours=12)
|
|
99
|
+
|
|
100
|
+
# Check for changes
|
|
101
|
+
changes = await tracker.check()
|
|
102
|
+
for change in changes:
|
|
103
|
+
if change.changed:
|
|
104
|
+
print(change.summary)
|
|
105
|
+
print(change.unified_diff)
|
|
106
|
+
|
|
107
|
+
# Get diff between specific snapshots
|
|
108
|
+
result = await tracker.diff("https://example.com", before_id=1, after_id=2)
|
|
109
|
+
|
|
110
|
+
# Browse snapshot history
|
|
111
|
+
history = await tracker.history("https://example.com", count=20)
|
|
112
|
+
|
|
113
|
+
# Stop tracking
|
|
114
|
+
await tracker.untrack("https://example.com")
|
|
115
|
+
|
|
116
|
+
await tracker.close()
|
|
117
|
+
|
|
118
|
+
asyncio.run(main())
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
### Convenience Functions
|
|
122
|
+
|
|
123
|
+
```python
|
|
124
|
+
from diffgrab import track, check, diff, history, untrack
|
|
125
|
+
|
|
126
|
+
await track("https://example.com")
|
|
127
|
+
changes = await check()
|
|
128
|
+
result = await diff("https://example.com")
|
|
129
|
+
snaps = await history("https://example.com")
|
|
130
|
+
await untrack("https://example.com")
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
### CLI
|
|
134
|
+
|
|
135
|
+
```bash
|
|
136
|
+
# Track a URL
|
|
137
|
+
diffgrab track https://example.com --interval 12
|
|
138
|
+
|
|
139
|
+
# Check all tracked URLs for changes
|
|
140
|
+
diffgrab check
|
|
141
|
+
|
|
142
|
+
# Check a specific URL
|
|
143
|
+
diffgrab check https://example.com
|
|
144
|
+
|
|
145
|
+
# Show diff between snapshots
|
|
146
|
+
diffgrab diff https://example.com
|
|
147
|
+
diffgrab diff https://example.com --before 1 --after 3
|
|
148
|
+
|
|
149
|
+
# View snapshot history
|
|
150
|
+
diffgrab history https://example.com --count 20
|
|
151
|
+
|
|
152
|
+
# Stop tracking
|
|
153
|
+
diffgrab untrack https://example.com
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
### MCP Server
|
|
157
|
+
|
|
158
|
+
Add to your Claude Code MCP config:
|
|
159
|
+
|
|
160
|
+
```json
|
|
161
|
+
{
|
|
162
|
+
"mcpServers": {
|
|
163
|
+
"diffgrab": {
|
|
164
|
+
"command": "diffgrab-mcp",
|
|
165
|
+
"args": []
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
Or with uvx:
|
|
172
|
+
|
|
173
|
+
```json
|
|
174
|
+
{
|
|
175
|
+
"mcpServers": {
|
|
176
|
+
"diffgrab": {
|
|
177
|
+
"command": "uvx",
|
|
178
|
+
"args": ["--from", "diffgrab[mcp]", "diffgrab-mcp"]
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
**MCP Tools:**
|
|
185
|
+
|
|
186
|
+
| Tool | Description |
|
|
187
|
+
|------|-------------|
|
|
188
|
+
| `track_url` | Register a URL for change tracking |
|
|
189
|
+
| `check_changes` | Check tracked URLs for changes |
|
|
190
|
+
| `get_diff` | Get structured diff between snapshots |
|
|
191
|
+
| `get_history` | Browse snapshot history |
|
|
192
|
+
| `untrack_url` | Stop tracking a URL |
|
|
193
|
+
|
|
194
|
+
## DiffResult
|
|
195
|
+
|
|
196
|
+
Every diff operation returns a `DiffResult`:
|
|
197
|
+
|
|
198
|
+
```python
|
|
199
|
+
@dataclass
|
|
200
|
+
class DiffResult:
|
|
201
|
+
url: str # The tracked URL
|
|
202
|
+
changed: bool # Whether content changed
|
|
203
|
+
added_lines: int # Lines added
|
|
204
|
+
removed_lines: int # Lines removed
|
|
205
|
+
changed_sections: list[str] # Markdown headings with changes
|
|
206
|
+
unified_diff: str # Standard unified diff
|
|
207
|
+
summary: str # Human-readable summary
|
|
208
|
+
before_snapshot_id: int | None # DB ID of older snapshot
|
|
209
|
+
after_snapshot_id: int | None # DB ID of newer snapshot
|
|
210
|
+
before_timestamp: str # When older snapshot was taken
|
|
211
|
+
after_timestamp: str # When newer snapshot was taken
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
## Storage
|
|
215
|
+
|
|
216
|
+
Snapshots are stored in SQLite at `~/.local/share/diffgrab/diffgrab.db` (auto-created). Custom path:
|
|
217
|
+
|
|
218
|
+
```python
|
|
219
|
+
tracker = DiffTracker(db_path="/path/to/custom.db")
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
## QuartzUnit Ecosystem
|
|
223
|
+
|
|
224
|
+
| Package | Role | PyPI |
|
|
225
|
+
|---------|------|------|
|
|
226
|
+
| [markgrab](https://github.com/QuartzUnit/markgrab) | HTML/YouTube/PDF/DOCX to markdown | `pip install markgrab` |
|
|
227
|
+
| [snapgrab](https://github.com/QuartzUnit/snapgrab) | URL to screenshot + metadata | `pip install snapgrab` |
|
|
228
|
+
| [docpick](https://github.com/QuartzUnit/docpick) | OCR + LLM document extraction | `pip install docpick` |
|
|
229
|
+
| [feedkit](https://github.com/QuartzUnit/feedkit) | RSS feed collection | `pip install feedkit` |
|
|
230
|
+
| **diffgrab** | **Web page change tracking** | `pip install diffgrab` |
|
|
231
|
+
| [browsegrab](https://github.com/QuartzUnit/browsegrab) | Browser agent for LLMs | Coming soon |
|
|
232
|
+
|
|
233
|
+
## License
|
|
234
|
+
|
|
235
|
+
[MIT](LICENSE)
|
|
236
|
+
|
|
237
|
+
<!-- mcp-name: io.github.ArkNill/diffgrab -->
|
diffgrab-0.1.0/README.md
ADDED
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
# diffgrab
|
|
2
|
+
|
|
3
|
+
[](https://pypi.org/project/diffgrab/)
|
|
4
|
+
[](https://pypi.org/project/diffgrab/)
|
|
5
|
+
[](https://github.com/QuartzUnit/diffgrab/blob/main/LICENSE)
|
|
6
|
+
|
|
7
|
+
> Web page change tracking with structured diffs. markgrab + snapgrab integration, MCP native.
|
|
8
|
+
|
|
9
|
+
```python
|
|
10
|
+
from diffgrab import DiffTracker
|
|
11
|
+
|
|
12
|
+
tracker = DiffTracker()
|
|
13
|
+
await tracker.track("https://example.com")
|
|
14
|
+
changes = await tracker.check()
|
|
15
|
+
for c in changes:
|
|
16
|
+
if c.changed:
|
|
17
|
+
print(c.summary) # "3 lines added, 1 lines removed in sections: Introduction."
|
|
18
|
+
print(c.unified_diff) # Standard unified diff output
|
|
19
|
+
await tracker.close()
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
## Features
|
|
23
|
+
|
|
24
|
+
- **Change detection** — track any URL, detect content changes via content hashing
|
|
25
|
+
- **Structured diffs** — unified diff + section-level analysis (which headings changed)
|
|
26
|
+
- **Human-readable summaries** — "5 lines added, 2 removed in sections: Intro, Methods"
|
|
27
|
+
- **Snapshot history** — SQLite storage, browse past versions of any page
|
|
28
|
+
- **markgrab powered** — HTML/YouTube/PDF/DOCX extraction via [markgrab](https://github.com/QuartzUnit/markgrab)
|
|
29
|
+
- **Visual diff** — optional screenshot comparison via [snapgrab](https://github.com/QuartzUnit/snapgrab)
|
|
30
|
+
- **MCP server** — 5 tools for Claude Code / MCP clients
|
|
31
|
+
- **CLI included** — `diffgrab track`, `check`, `diff`, `history`, `untrack`
|
|
32
|
+
|
|
33
|
+
## Install
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
pip install diffgrab
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
Optional extras:
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
pip install 'diffgrab[cli]' # CLI with click + rich
|
|
43
|
+
pip install 'diffgrab[visual]' # Visual diff with snapgrab
|
|
44
|
+
pip install 'diffgrab[mcp]' # MCP server with fastmcp
|
|
45
|
+
pip install 'diffgrab[all]' # Everything
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## Usage
|
|
49
|
+
|
|
50
|
+
### Python API
|
|
51
|
+
|
|
52
|
+
```python
|
|
53
|
+
import asyncio
|
|
54
|
+
from diffgrab import DiffTracker
|
|
55
|
+
|
|
56
|
+
async def main():
|
|
57
|
+
tracker = DiffTracker()
|
|
58
|
+
|
|
59
|
+
# Track a URL (takes initial snapshot)
|
|
60
|
+
await tracker.track("https://example.com", interval_hours=12)
|
|
61
|
+
|
|
62
|
+
# Check for changes
|
|
63
|
+
changes = await tracker.check()
|
|
64
|
+
for change in changes:
|
|
65
|
+
if change.changed:
|
|
66
|
+
print(change.summary)
|
|
67
|
+
print(change.unified_diff)
|
|
68
|
+
|
|
69
|
+
# Get diff between specific snapshots
|
|
70
|
+
result = await tracker.diff("https://example.com", before_id=1, after_id=2)
|
|
71
|
+
|
|
72
|
+
# Browse snapshot history
|
|
73
|
+
history = await tracker.history("https://example.com", count=20)
|
|
74
|
+
|
|
75
|
+
# Stop tracking
|
|
76
|
+
await tracker.untrack("https://example.com")
|
|
77
|
+
|
|
78
|
+
await tracker.close()
|
|
79
|
+
|
|
80
|
+
asyncio.run(main())
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
### Convenience Functions
|
|
84
|
+
|
|
85
|
+
```python
|
|
86
|
+
from diffgrab import track, check, diff, history, untrack
|
|
87
|
+
|
|
88
|
+
await track("https://example.com")
|
|
89
|
+
changes = await check()
|
|
90
|
+
result = await diff("https://example.com")
|
|
91
|
+
snaps = await history("https://example.com")
|
|
92
|
+
await untrack("https://example.com")
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
### CLI
|
|
96
|
+
|
|
97
|
+
```bash
|
|
98
|
+
# Track a URL
|
|
99
|
+
diffgrab track https://example.com --interval 12
|
|
100
|
+
|
|
101
|
+
# Check all tracked URLs for changes
|
|
102
|
+
diffgrab check
|
|
103
|
+
|
|
104
|
+
# Check a specific URL
|
|
105
|
+
diffgrab check https://example.com
|
|
106
|
+
|
|
107
|
+
# Show diff between snapshots
|
|
108
|
+
diffgrab diff https://example.com
|
|
109
|
+
diffgrab diff https://example.com --before 1 --after 3
|
|
110
|
+
|
|
111
|
+
# View snapshot history
|
|
112
|
+
diffgrab history https://example.com --count 20
|
|
113
|
+
|
|
114
|
+
# Stop tracking
|
|
115
|
+
diffgrab untrack https://example.com
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
### MCP Server
|
|
119
|
+
|
|
120
|
+
Add to your Claude Code MCP config:
|
|
121
|
+
|
|
122
|
+
```json
|
|
123
|
+
{
|
|
124
|
+
"mcpServers": {
|
|
125
|
+
"diffgrab": {
|
|
126
|
+
"command": "diffgrab-mcp",
|
|
127
|
+
"args": []
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
Or with uvx:
|
|
134
|
+
|
|
135
|
+
```json
|
|
136
|
+
{
|
|
137
|
+
"mcpServers": {
|
|
138
|
+
"diffgrab": {
|
|
139
|
+
"command": "uvx",
|
|
140
|
+
"args": ["--from", "diffgrab[mcp]", "diffgrab-mcp"]
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
**MCP Tools:**
|
|
147
|
+
|
|
148
|
+
| Tool | Description |
|
|
149
|
+
|------|-------------|
|
|
150
|
+
| `track_url` | Register a URL for change tracking |
|
|
151
|
+
| `check_changes` | Check tracked URLs for changes |
|
|
152
|
+
| `get_diff` | Get structured diff between snapshots |
|
|
153
|
+
| `get_history` | Browse snapshot history |
|
|
154
|
+
| `untrack_url` | Stop tracking a URL |
|
|
155
|
+
|
|
156
|
+
## DiffResult
|
|
157
|
+
|
|
158
|
+
Every diff operation returns a `DiffResult`:
|
|
159
|
+
|
|
160
|
+
```python
|
|
161
|
+
@dataclass
|
|
162
|
+
class DiffResult:
|
|
163
|
+
url: str # The tracked URL
|
|
164
|
+
changed: bool # Whether content changed
|
|
165
|
+
added_lines: int # Lines added
|
|
166
|
+
removed_lines: int # Lines removed
|
|
167
|
+
changed_sections: list[str] # Markdown headings with changes
|
|
168
|
+
unified_diff: str # Standard unified diff
|
|
169
|
+
summary: str # Human-readable summary
|
|
170
|
+
before_snapshot_id: int | None # DB ID of older snapshot
|
|
171
|
+
after_snapshot_id: int | None # DB ID of newer snapshot
|
|
172
|
+
before_timestamp: str # When older snapshot was taken
|
|
173
|
+
after_timestamp: str # When newer snapshot was taken
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
## Storage
|
|
177
|
+
|
|
178
|
+
Snapshots are stored in SQLite at `~/.local/share/diffgrab/diffgrab.db` (auto-created). Custom path:
|
|
179
|
+
|
|
180
|
+
```python
|
|
181
|
+
tracker = DiffTracker(db_path="/path/to/custom.db")
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
## QuartzUnit Ecosystem
|
|
185
|
+
|
|
186
|
+
| Package | Role | PyPI |
|
|
187
|
+
|---------|------|------|
|
|
188
|
+
| [markgrab](https://github.com/QuartzUnit/markgrab) | HTML/YouTube/PDF/DOCX to markdown | `pip install markgrab` |
|
|
189
|
+
| [snapgrab](https://github.com/QuartzUnit/snapgrab) | URL to screenshot + metadata | `pip install snapgrab` |
|
|
190
|
+
| [docpick](https://github.com/QuartzUnit/docpick) | OCR + LLM document extraction | `pip install docpick` |
|
|
191
|
+
| [feedkit](https://github.com/QuartzUnit/feedkit) | RSS feed collection | `pip install feedkit` |
|
|
192
|
+
| **diffgrab** | **Web page change tracking** | `pip install diffgrab` |
|
|
193
|
+
| [browsegrab](https://github.com/QuartzUnit/browsegrab) | Browser agent for LLMs | Coming soon |
|
|
194
|
+
|
|
195
|
+
## License
|
|
196
|
+
|
|
197
|
+
[MIT](LICENSE)
|
|
198
|
+
|
|
199
|
+
<!-- mcp-name: io.github.ArkNill/diffgrab -->
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
"""diffgrab — Web page change tracking with structured diffs."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from diffgrab.differ import DiffResult
|
|
6
|
+
from diffgrab.tracker import DiffTracker
|
|
7
|
+
|
|
8
|
+
__all__ = ["DiffTracker", "DiffResult", "track", "check", "diff", "history", "untrack"]
|
|
9
|
+
__version__ = "0.1.0"
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
async def track(url: str, interval_hours: int = 24, *, db_path: str = "") -> str:
|
|
13
|
+
"""Register a URL for change tracking.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
url: The URL to track.
|
|
17
|
+
interval_hours: Check interval in hours (default: 24).
|
|
18
|
+
db_path: Custom database path (optional).
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
Status message.
|
|
22
|
+
"""
|
|
23
|
+
kwargs = {"db_path": db_path} if db_path else {}
|
|
24
|
+
tracker = DiffTracker(**kwargs)
|
|
25
|
+
try:
|
|
26
|
+
return await tracker.track(url, interval_hours)
|
|
27
|
+
finally:
|
|
28
|
+
await tracker.close()
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
async def check(url: str | None = None, *, db_path: str = "") -> list[DiffResult]:
|
|
32
|
+
"""Check tracked URLs for changes.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
url: Specific URL to check, or None for all.
|
|
36
|
+
db_path: Custom database path (optional).
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
List of DiffResult objects.
|
|
40
|
+
"""
|
|
41
|
+
kwargs = {"db_path": db_path} if db_path else {}
|
|
42
|
+
tracker = DiffTracker(**kwargs)
|
|
43
|
+
try:
|
|
44
|
+
return await tracker.check(url)
|
|
45
|
+
finally:
|
|
46
|
+
await tracker.close()
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
async def diff(
|
|
50
|
+
url: str,
|
|
51
|
+
before_id: int | None = None,
|
|
52
|
+
after_id: int | None = None,
|
|
53
|
+
*,
|
|
54
|
+
db_path: str = "",
|
|
55
|
+
) -> DiffResult:
|
|
56
|
+
"""Get structured diff between two snapshots of a URL.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
url: The URL to diff.
|
|
60
|
+
before_id: Database ID of the older snapshot.
|
|
61
|
+
after_id: Database ID of the newer snapshot.
|
|
62
|
+
db_path: Custom database path (optional).
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
DiffResult with structured diff.
|
|
66
|
+
"""
|
|
67
|
+
kwargs = {"db_path": db_path} if db_path else {}
|
|
68
|
+
tracker = DiffTracker(**kwargs)
|
|
69
|
+
try:
|
|
70
|
+
return await tracker.diff(url, before_id, after_id)
|
|
71
|
+
finally:
|
|
72
|
+
await tracker.close()
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
async def history(url: str, count: int = 10, *, db_path: str = "") -> list[dict]:
|
|
76
|
+
"""Get snapshot history for a URL.
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
url: The URL to get history for.
|
|
80
|
+
count: Maximum number of snapshots (default: 10).
|
|
81
|
+
db_path: Custom database path (optional).
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
List of snapshot metadata dicts.
|
|
85
|
+
"""
|
|
86
|
+
kwargs = {"db_path": db_path} if db_path else {}
|
|
87
|
+
tracker = DiffTracker(**kwargs)
|
|
88
|
+
try:
|
|
89
|
+
return await tracker.history(url, count)
|
|
90
|
+
finally:
|
|
91
|
+
await tracker.close()
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
async def untrack(url: str, *, db_path: str = "") -> str:
|
|
95
|
+
"""Remove a URL from tracking.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
url: The URL to untrack.
|
|
99
|
+
db_path: Custom database path (optional).
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
Status message.
|
|
103
|
+
"""
|
|
104
|
+
kwargs = {"db_path": db_path} if db_path else {}
|
|
105
|
+
tracker = DiffTracker(**kwargs)
|
|
106
|
+
try:
|
|
107
|
+
return await tracker.untrack(url)
|
|
108
|
+
finally:
|
|
109
|
+
await tracker.close()
|