marker-pdf-agent 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- marker_pdf_agent-0.1.0/DEVELOPMENT.md +46 -0
- marker_pdf_agent-0.1.0/LICENSE +21 -0
- marker_pdf_agent-0.1.0/MANIFEST.in +13 -0
- marker_pdf_agent-0.1.0/PKG-INFO +196 -0
- marker_pdf_agent-0.1.0/README.md +154 -0
- marker_pdf_agent-0.1.0/marker_pdf_agent/__init__.py +4 -0
- marker_pdf_agent-0.1.0/marker_pdf_agent/assets/file-markdown.svg +13 -0
- marker_pdf_agent-0.1.0/marker_pdf_agent/tray.py +229 -0
- marker_pdf_agent-0.1.0/marker_pdf_agent/worker.py +931 -0
- marker_pdf_agent-0.1.0/marker_pdf_agent.egg-info/PKG-INFO +196 -0
- marker_pdf_agent-0.1.0/marker_pdf_agent.egg-info/SOURCES.txt +17 -0
- marker_pdf_agent-0.1.0/marker_pdf_agent.egg-info/dependency_links.txt +1 -0
- marker_pdf_agent-0.1.0/marker_pdf_agent.egg-info/entry_points.txt +2 -0
- marker_pdf_agent-0.1.0/marker_pdf_agent.egg-info/requires.txt +16 -0
- marker_pdf_agent-0.1.0/marker_pdf_agent.egg-info/top_level.txt +1 -0
- marker_pdf_agent-0.1.0/pyproject.toml +117 -0
- marker_pdf_agent-0.1.0/requirements.txt +3 -0
- marker_pdf_agent-0.1.0/setup.cfg +4 -0
- marker_pdf_agent-0.1.0/tests/test_worker.py +767 -0
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# Development
|
|
2
|
+
|
|
3
|
+
## Tests
|
|
4
|
+
|
|
5
|
+
Run the deterministic test suite:
|
|
6
|
+
|
|
7
|
+
```sh
|
|
8
|
+
venv/bin/python -m pytest
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
Run the live Ollama routing check explicitly when Ollama and `llama3.1` are installed:
|
|
12
|
+
|
|
13
|
+
```sh
|
|
14
|
+
venv/bin/python -m pytest -m live_ollama
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
## Quality Checks
|
|
18
|
+
|
|
19
|
+
```sh
|
|
20
|
+
venv/bin/python -m black --check marker_pdf_agent tests
|
|
21
|
+
venv/bin/python -m ruff check marker_pdf_agent tests
|
|
22
|
+
venv/bin/python -m flake8 marker_pdf_agent tests
|
|
23
|
+
venv/bin/python -m mypy marker_pdf_agent tests
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
## Publish
|
|
27
|
+
|
|
28
|
+
Build and validate the source and wheel distributions before upload:
|
|
29
|
+
|
|
30
|
+
```sh
|
|
31
|
+
rm -rf dist
|
|
32
|
+
venv/bin/python -m build
|
|
33
|
+
venv/bin/python -m twine check dist/*
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
Upload to TestPyPI first:
|
|
37
|
+
|
|
38
|
+
```sh
|
|
39
|
+
venv/bin/python -m twine upload --repository testpypi dist/*
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
Upload to PyPI after verifying the TestPyPI package:
|
|
43
|
+
|
|
44
|
+
```sh
|
|
45
|
+
venv/bin/python -m twine upload dist/*
|
|
46
|
+
```
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Roberto Rossi
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
include README.md
|
|
2
|
+
include DEVELOPMENT.md
|
|
3
|
+
include LICENSE
|
|
4
|
+
include requirements.txt
|
|
5
|
+
include pyproject.toml
|
|
6
|
+
recursive-include marker_pdf_agent/assets *.svg
|
|
7
|
+
recursive-include tests *.py
|
|
8
|
+
recursive-exclude * __pycache__
|
|
9
|
+
recursive-exclude * *.py[co]
|
|
10
|
+
prune build
|
|
11
|
+
prune dist
|
|
12
|
+
prune .marker-pdf-agent
|
|
13
|
+
prune venv
|
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: marker-pdf-agent
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Document conversion agent that watches folders, converts files with marker-pdf, and routes Markdown outputs.
|
|
5
|
+
Author: Roberto Rossi
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/gwr3n/marker-pdf-agent
|
|
8
|
+
Project-URL: Repository, https://github.com/gwr3n/marker-pdf-agent
|
|
9
|
+
Project-URL: Issues, https://github.com/gwr3n/marker-pdf-agent/issues
|
|
10
|
+
Keywords: documents,markdown,pdf,marker-pdf,ollama,tray
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Environment :: Console
|
|
13
|
+
Classifier: Environment :: MacOS X
|
|
14
|
+
Classifier: Intended Audience :: End Users/Desktop
|
|
15
|
+
Classifier: Operating System :: MacOS
|
|
16
|
+
Classifier: Operating System :: Microsoft :: Windows
|
|
17
|
+
Classifier: Operating System :: POSIX :: Linux
|
|
18
|
+
Classifier: Programming Language :: Python :: Implementation :: CPython
|
|
19
|
+
Classifier: Programming Language :: Python :: 3
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
24
|
+
Classifier: Topic :: Office/Business
|
|
25
|
+
Classifier: Topic :: Text Processing :: Markup :: Markdown
|
|
26
|
+
Requires-Python: >=3.10
|
|
27
|
+
Description-Content-Type: text/markdown
|
|
28
|
+
License-File: LICENSE
|
|
29
|
+
Requires-Dist: marker-pdf>=1.8.0
|
|
30
|
+
Provides-Extra: gui
|
|
31
|
+
Requires-Dist: PySide6>=6.7; extra == "gui"
|
|
32
|
+
Requires-Dist: pyobjc-framework-Cocoa>=10; platform_system == "Darwin" and extra == "gui"
|
|
33
|
+
Provides-Extra: dev
|
|
34
|
+
Requires-Dist: black>=25.0.0; extra == "dev"
|
|
35
|
+
Requires-Dist: build>=1.3.0; extra == "dev"
|
|
36
|
+
Requires-Dist: flake8>=7.0.0; extra == "dev"
|
|
37
|
+
Requires-Dist: mypy>=1.0.0; extra == "dev"
|
|
38
|
+
Requires-Dist: pytest>=8.4.0; extra == "dev"
|
|
39
|
+
Requires-Dist: ruff>=0.8.0; extra == "dev"
|
|
40
|
+
Requires-Dist: twine>=6.0.0; extra == "dev"
|
|
41
|
+
Dynamic: license-file
|
|
42
|
+
|
|
43
|
+
# marker-pdf-agent
|
|
44
|
+
|
|
45
|
+
[](https://github.com/gwr3n/marker-pdf-agent/actions/workflows/ci.yml)
|
|
46
|
+
[](https://pypi.org/project/marker-pdf-agent/)
|
|
47
|
+
[](https://pypi.org/project/marker-pdf-agent/)
|
|
48
|
+
[](https://github.com/gwr3n/marker-pdf-agent/commits/main)
|
|
49
|
+
[](https://pepy.tech/project/marker-pdf-agent)
|
|
50
|
+
[](https://pypi.org/project/marker-pdf-agent/)
|
|
51
|
+
[](LICENSE)
|
|
52
|
+
[](https://github.com/psf/black)
|
|
53
|
+
[](https://docs.astral.sh/ruff/)
|
|
54
|
+
[](https://mypy-lang.org/)
|
|
55
|
+
|
|
56
|
+
A small Python document-conversion agent for turning PDFs and other supported documents into Markdown with `marker-pdf`. It watches one or more managed folders, queues documents moved into each `incoming/` directory, processes them through a single conversion worker, and stores the original plus the converted Markdown or asset zip under `converted/<category>/`.
|
|
57
|
+
|
|
58
|
+
The agent can run as a plain foreground worker, an optional tray/menu-bar app, or a user-level background service. Folder routing is local and deterministic by default, with optional Ollama-assisted category selection when explicitly enabled.
|
|
59
|
+
|
|
60
|
+
## Install
|
|
61
|
+
|
|
62
|
+
Use the local virtual environment:
|
|
63
|
+
|
|
64
|
+
```sh
|
|
65
|
+
venv/bin/python -m pip install -r requirements.txt
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
Install the optional desktop GUI dependencies when you want the status-bar app:
|
|
69
|
+
|
|
70
|
+
```sh
|
|
71
|
+
venv/bin/python -m pip install ".[gui]"
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
When installing from a built wheel, put the extra on the wheel filename:
|
|
75
|
+
|
|
76
|
+
```sh
|
|
77
|
+
venv/bin/python -m pip install "dist/marker_pdf_agent-0.1.0-py3-none-any.whl[gui]"
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
`ollama` is optional and is not started or queried unless you explicitly pass `--ollama-model`. Without that flag, converted files go to `converted/uncategorized`.
|
|
81
|
+
|
|
82
|
+
## Run
|
|
83
|
+
|
|
84
|
+
From the folder you want the agent to manage:
|
|
85
|
+
|
|
86
|
+
```sh
|
|
87
|
+
venv/bin/python -m marker_pdf_agent.worker run
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
For compatibility, running without the `run` subcommand still starts the foreground worker.
|
|
91
|
+
|
|
92
|
+
## Status-Bar GUI
|
|
93
|
+
|
|
94
|
+
The status-bar GUI is for synchronous foreground runs, not installed daemon/service runs. It uses the same worker manager as the command-line foreground worker, shows a compact `Idle` or `Converting` state plus queue size, and still allows only one `marker-pdf` conversion at a time. On macOS it runs as a menu-bar app rather than showing a Dock icon.
|
|
95
|
+
|
|
96
|
+
Install the optional GUI extra before using this mode:
|
|
97
|
+
|
|
98
|
+
```sh
|
|
99
|
+
venv/bin/python -m pip install ".[gui]"
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
Launch it with either command:
|
|
103
|
+
|
|
104
|
+
```sh
|
|
105
|
+
venv/bin/python -m marker_pdf_agent.worker tray --root /path/to/folder
|
|
106
|
+
venv/bin/python -m marker_pdf_agent.worker run --tray --root /path/to/folder
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
Click the status-bar icon to open the menu. The menu refreshes when opened and shows the worker state, queue length, and monitored folders. It also has controls to open a folder's `incoming/` or `converted/` directory, add or remove monitored folders, and quit the foreground worker cleanly. Detailed progress and routing messages are printed to stdout.
|
|
110
|
+
|
|
111
|
+
Use the `Ollama routing` submenu to choose `Disabled` or one of the installed Ollama models. Choose `Refresh models` to query `ollama list` in the background; the app does not query Ollama just from opening the tray menu. The selected model is persisted with the tray config and applies to all monitored folders.
|
|
112
|
+
|
|
113
|
+
Monitored folders and the selected Ollama model are persisted in `~/.marker-pdf-agent/config.json` by default. Use `--config /path/to/config.json` to choose a different config file. The `--root` folder passed at launch is added to that file automatically, and folders or model settings changed from the GUI update the same file.
|
|
114
|
+
|
|
115
|
+
Multiple monitored folders share one conversion queue and one converter loop. Files from any monitored `incoming/` folder may be queued, but only one `marker-pdf` subprocess runs at a time. Removing a monitored folder stops future scans and drops pending queued jobs for that folder.
|
|
116
|
+
|
|
117
|
+
By default the worker creates and uses these folders:
|
|
118
|
+
|
|
119
|
+
- `incoming/` - move documents here for conversion
|
|
120
|
+
- `.marker-pdf-agent/processing/` - temporary in-progress files
|
|
121
|
+
- `.marker-pdf-agent/failed/` - source files that failed conversion
|
|
122
|
+
- `converted/<category>/` - original documents plus final Markdown or zip artifacts
|
|
123
|
+
|
|
124
|
+
If conversion produces only Markdown, the final artifact is a `.md` file. If marker emits images or other assets, the final artifact is a `.zip` containing the Markdown plus assets. The original document is moved into the same category folder as the converted artifact.
|
|
125
|
+
|
|
126
|
+
If a conversion fails, times out, or is interrupted during shutdown, the source document is moved to `.marker-pdf-agent/failed/`. If the worker starts and finds leftover files in `.marker-pdf-agent/processing/` from a previous interrupted run, it moves them to `.marker-pdf-agent/failed/` so they are visible for manual retry.
|
|
127
|
+
|
|
128
|
+
## Options
|
|
129
|
+
|
|
130
|
+
```sh
|
|
131
|
+
venv/bin/python -m marker_pdf_agent.worker \
|
|
132
|
+
--root /path/to/folder \
|
|
133
|
+
--incoming incoming \
|
|
134
|
+
--converted converted \
|
|
135
|
+
--marker-command marker_single \
|
|
136
|
+
--marker-timeout 1800 \
|
|
137
|
+
--ollama-model llama3.1
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
Useful flags:
|
|
141
|
+
|
|
142
|
+
- `--root`: manage a folder other than the current working directory
|
|
143
|
+
- `--config`: choose the persisted foreground GUI config file, defaults to `~/.marker-pdf-agent/config.json`
|
|
144
|
+
- `--incoming`: choose the watched subfolder
|
|
145
|
+
- `--converted`: choose the output subfolder
|
|
146
|
+
- `--marker-command`: choose the `marker-pdf` executable, defaults to `marker_single`
|
|
147
|
+
- `--marker-timeout`: maximum seconds to allow one conversion before moving it to failed, defaults to 1800
|
|
148
|
+
- `--ollama-model`: enable AI folder routing with a specific installed Ollama model
|
|
149
|
+
- `--no-ollama`: disable AI folder routing; this is the default unless `--ollama-model` is set
|
|
150
|
+
|
|
151
|
+
## Background Service
|
|
152
|
+
|
|
153
|
+
The worker itself stays plain Python. Because `marker-pdf` can be GPU-heavy, the agent uses a user-level singleton lock and is intended to run as one background worker per user. That one worker owns the conversion queue and processes one document at a time.
|
|
154
|
+
|
|
155
|
+
Internally, foreground runs go through a worker manager with a single shared conversion queue. This is important for multi-folder and status-bar UI support: multiple monitored folders may enqueue documents, but only one converter loop drains the queue, so only one `marker-pdf` subprocess should use the GPU at a time.
|
|
156
|
+
|
|
157
|
+
The service CLI detects the current operating system and writes the native service definition for that platform:
|
|
158
|
+
|
|
159
|
+
- macOS: user LaunchAgent plist in `~/Library/LaunchAgents/`
|
|
160
|
+
- Linux: systemd user unit in `~/.config/systemd/user/`
|
|
161
|
+
- Windows: setup instructions for NSSM or a pywin32 service wrapper in `.marker-pdf-agent/windows-service.md`
|
|
162
|
+
|
|
163
|
+
Install a service for a managed folder:
|
|
164
|
+
|
|
165
|
+
```sh
|
|
166
|
+
venv/bin/python -m marker_pdf_agent.worker install-service --root /path/to/folder
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
Then start it with the command printed by the installer. On macOS this is a `launchctl bootstrap ...` command. On Linux this is a `systemctl --user daemon-reload && systemctl --user enable --now ...` command. Windows needs an additional service host, because Python cannot install a native Windows service without one.
|
|
170
|
+
|
|
171
|
+
Check or remove the service definition:
|
|
172
|
+
|
|
173
|
+
```sh
|
|
174
|
+
venv/bin/python -m marker_pdf_agent.worker status
|
|
175
|
+
venv/bin/python -m marker_pdf_agent.worker uninstall-service
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
The `--service-name` option changes the installed service definition name. It is mainly useful when replacing or testing service definitions; running multiple agent services at once is not recommended, and the runtime lock prevents concurrent worker processes for the same user.
|
|
179
|
+
|
|
180
|
+
```sh
|
|
181
|
+
venv/bin/python -m marker_pdf_agent.worker install-service \
|
|
182
|
+
--service-name marker-pdf-agent \
|
|
183
|
+
--root /path/to/folder
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
Service logs are written under the managed folder in `.marker-pdf-agent/service.log` and `.marker-pdf-agent/service.err.log`.
|
|
187
|
+
|
|
188
|
+
## Development
|
|
189
|
+
|
|
190
|
+
Development, test, quality-check, and publishing notes are in [DEVELOPMENT.md](DEVELOPMENT.md).
|
|
191
|
+
|
|
192
|
+
## License
|
|
193
|
+
|
|
194
|
+
This project is released under the MIT License. See [LICENSE](LICENSE).
|
|
195
|
+
|
|
196
|
+
The tray icon is derived from the MIT-licensed `file-markdown` SVG from SVG Repo: <https://www.svgrepo.com/svg/332064/file-markdown>.
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
# marker-pdf-agent
|
|
2
|
+
|
|
3
|
+
[](https://github.com/gwr3n/marker-pdf-agent/actions/workflows/ci.yml)
|
|
4
|
+
[](https://pypi.org/project/marker-pdf-agent/)
|
|
5
|
+
[](https://pypi.org/project/marker-pdf-agent/)
|
|
6
|
+
[](https://github.com/gwr3n/marker-pdf-agent/commits/main)
|
|
7
|
+
[](https://pepy.tech/project/marker-pdf-agent)
|
|
8
|
+
[](https://pypi.org/project/marker-pdf-agent/)
|
|
9
|
+
[](LICENSE)
|
|
10
|
+
[](https://github.com/psf/black)
|
|
11
|
+
[](https://docs.astral.sh/ruff/)
|
|
12
|
+
[](https://mypy-lang.org/)
|
|
13
|
+
|
|
14
|
+
A small Python document-conversion agent for turning PDFs and other supported documents into Markdown with `marker-pdf`. It watches one or more managed folders, queues documents moved into each `incoming/` directory, processes them through a single conversion worker, and stores the original plus the converted Markdown or asset zip under `converted/<category>/`.
|
|
15
|
+
|
|
16
|
+
The agent can run as a plain foreground worker, an optional tray/menu-bar app, or a user-level background service. Folder routing is local and deterministic by default, with optional Ollama-assisted category selection when explicitly enabled.
|
|
17
|
+
|
|
18
|
+
## Install
|
|
19
|
+
|
|
20
|
+
Use the local virtual environment:
|
|
21
|
+
|
|
22
|
+
```sh
|
|
23
|
+
venv/bin/python -m pip install -r requirements.txt
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
Install the optional desktop GUI dependencies when you want the status-bar app:
|
|
27
|
+
|
|
28
|
+
```sh
|
|
29
|
+
venv/bin/python -m pip install ".[gui]"
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
When installing from a built wheel, put the extra on the wheel filename:
|
|
33
|
+
|
|
34
|
+
```sh
|
|
35
|
+
venv/bin/python -m pip install "dist/marker_pdf_agent-0.1.0-py3-none-any.whl[gui]"
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
`ollama` is optional and is not started or queried unless you explicitly pass `--ollama-model`. Without that flag, converted files go to `converted/uncategorized`.
|
|
39
|
+
|
|
40
|
+
## Run
|
|
41
|
+
|
|
42
|
+
From the folder you want the agent to manage:
|
|
43
|
+
|
|
44
|
+
```sh
|
|
45
|
+
venv/bin/python -m marker_pdf_agent.worker run
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
For compatibility, running without the `run` subcommand still starts the foreground worker.
|
|
49
|
+
|
|
50
|
+
## Status-Bar GUI
|
|
51
|
+
|
|
52
|
+
The status-bar GUI is for synchronous foreground runs, not installed daemon/service runs. It uses the same worker manager as the command-line foreground worker, shows a compact `Idle` or `Converting` state plus queue size, and still allows only one `marker-pdf` conversion at a time. On macOS it runs as a menu-bar app rather than showing a Dock icon.
|
|
53
|
+
|
|
54
|
+
Install the optional GUI extra before using this mode:
|
|
55
|
+
|
|
56
|
+
```sh
|
|
57
|
+
venv/bin/python -m pip install ".[gui]"
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
Launch it with either command:
|
|
61
|
+
|
|
62
|
+
```sh
|
|
63
|
+
venv/bin/python -m marker_pdf_agent.worker tray --root /path/to/folder
|
|
64
|
+
venv/bin/python -m marker_pdf_agent.worker run --tray --root /path/to/folder
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
Click the status-bar icon to open the menu. The menu refreshes when opened and shows the worker state, queue length, and monitored folders. It also has controls to open a folder's `incoming/` or `converted/` directory, add or remove monitored folders, and quit the foreground worker cleanly. Detailed progress and routing messages are printed to stdout.
|
|
68
|
+
|
|
69
|
+
Use the `Ollama routing` submenu to choose `Disabled` or one of the installed Ollama models. Choose `Refresh models` to query `ollama list` in the background; the app does not query Ollama just from opening the tray menu. The selected model is persisted with the tray config and applies to all monitored folders.
|
|
70
|
+
|
|
71
|
+
Monitored folders and the selected Ollama model are persisted in `~/.marker-pdf-agent/config.json` by default. Use `--config /path/to/config.json` to choose a different config file. The `--root` folder passed at launch is added to that file automatically, and folders or model settings changed from the GUI update the same file.
|
|
72
|
+
|
|
73
|
+
Multiple monitored folders share one conversion queue and one converter loop. Files from any monitored `incoming/` folder may be queued, but only one `marker-pdf` subprocess runs at a time. Removing a monitored folder stops future scans and drops pending queued jobs for that folder.
|
|
74
|
+
|
|
75
|
+
By default the worker creates and uses these folders:
|
|
76
|
+
|
|
77
|
+
- `incoming/` - move documents here for conversion
|
|
78
|
+
- `.marker-pdf-agent/processing/` - temporary in-progress files
|
|
79
|
+
- `.marker-pdf-agent/failed/` - source files that failed conversion
|
|
80
|
+
- `converted/<category>/` - original documents plus final Markdown or zip artifacts
|
|
81
|
+
|
|
82
|
+
If conversion produces only Markdown, the final artifact is a `.md` file. If marker emits images or other assets, the final artifact is a `.zip` containing the Markdown plus assets. The original document is moved into the same category folder as the converted artifact.
|
|
83
|
+
|
|
84
|
+
If a conversion fails, times out, or is interrupted during shutdown, the source document is moved to `.marker-pdf-agent/failed/`. If the worker starts and finds leftover files in `.marker-pdf-agent/processing/` from a previous interrupted run, it moves them to `.marker-pdf-agent/failed/` so they are visible for manual retry.
|
|
85
|
+
|
|
86
|
+
## Options
|
|
87
|
+
|
|
88
|
+
```sh
|
|
89
|
+
venv/bin/python -m marker_pdf_agent.worker \
|
|
90
|
+
--root /path/to/folder \
|
|
91
|
+
--incoming incoming \
|
|
92
|
+
--converted converted \
|
|
93
|
+
--marker-command marker_single \
|
|
94
|
+
--marker-timeout 1800 \
|
|
95
|
+
--ollama-model llama3.1
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
Useful flags:
|
|
99
|
+
|
|
100
|
+
- `--root`: manage a folder other than the current working directory
|
|
101
|
+
- `--config`: choose the persisted foreground GUI config file, defaults to `~/.marker-pdf-agent/config.json`
|
|
102
|
+
- `--incoming`: choose the watched subfolder
|
|
103
|
+
- `--converted`: choose the output subfolder
|
|
104
|
+
- `--marker-command`: choose the `marker-pdf` executable, defaults to `marker_single`
|
|
105
|
+
- `--marker-timeout`: maximum seconds to allow one conversion before moving it to failed, defaults to 1800
|
|
106
|
+
- `--ollama-model`: enable AI folder routing with a specific installed Ollama model
|
|
107
|
+
- `--no-ollama`: disable AI folder routing; this is the default unless `--ollama-model` is set
|
|
108
|
+
|
|
109
|
+
## Background Service
|
|
110
|
+
|
|
111
|
+
The worker itself stays plain Python. Because `marker-pdf` can be GPU-heavy, the agent uses a user-level singleton lock and is intended to run as one background worker per user. That one worker owns the conversion queue and processes one document at a time.
|
|
112
|
+
|
|
113
|
+
Internally, foreground runs go through a worker manager with a single shared conversion queue. This is important for multi-folder and status-bar UI support: multiple monitored folders may enqueue documents, but only one converter loop drains the queue, so only one `marker-pdf` subprocess should use the GPU at a time.
|
|
114
|
+
|
|
115
|
+
The service CLI detects the current operating system and writes the native service definition for that platform:
|
|
116
|
+
|
|
117
|
+
- macOS: user LaunchAgent plist in `~/Library/LaunchAgents/`
|
|
118
|
+
- Linux: systemd user unit in `~/.config/systemd/user/`
|
|
119
|
+
- Windows: setup instructions for NSSM or a pywin32 service wrapper in `.marker-pdf-agent/windows-service.md`
|
|
120
|
+
|
|
121
|
+
Install a service for a managed folder:
|
|
122
|
+
|
|
123
|
+
```sh
|
|
124
|
+
venv/bin/python -m marker_pdf_agent.worker install-service --root /path/to/folder
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
Then start it with the command printed by the installer. On macOS this is a `launchctl bootstrap ...` command. On Linux this is a `systemctl --user daemon-reload && systemctl --user enable --now ...` command. Windows needs an additional service host, because Python cannot install a native Windows service without one.
|
|
128
|
+
|
|
129
|
+
Check or remove the service definition:
|
|
130
|
+
|
|
131
|
+
```sh
|
|
132
|
+
venv/bin/python -m marker_pdf_agent.worker status
|
|
133
|
+
venv/bin/python -m marker_pdf_agent.worker uninstall-service
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
The `--service-name` option changes the installed service definition name. It is mainly useful when replacing or testing service definitions; running multiple agent services at once is not recommended, and the runtime lock prevents concurrent worker processes for the same user.
|
|
137
|
+
|
|
138
|
+
```sh
|
|
139
|
+
venv/bin/python -m marker_pdf_agent.worker install-service \
|
|
140
|
+
--service-name marker-pdf-agent \
|
|
141
|
+
--root /path/to/folder
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
Service logs are written under the managed folder in `.marker-pdf-agent/service.log` and `.marker-pdf-agent/service.err.log`.
|
|
145
|
+
|
|
146
|
+
## Development
|
|
147
|
+
|
|
148
|
+
Development, test, quality-check, and publishing notes are in [DEVELOPMENT.md](DEVELOPMENT.md).
|
|
149
|
+
|
|
150
|
+
## License
|
|
151
|
+
|
|
152
|
+
This project is released under the MIT License. See [LICENSE](LICENSE).
|
|
153
|
+
|
|
154
|
+
The tray icon is derived from the MIT-licensed `file-markdown` SVG from SVG Repo: <https://www.svgrepo.com/svg/332064/file-markdown>.
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
2
|
+
<svg id="Layer_1" xmlns="http://www.w3.org/2000/svg" version="1.1" viewBox="0 0 550 700">
|
|
3
|
+
<!-- Derived from SVG Repo file-markdown icon, MIT License: https://www.svgrepo.com/svg/332064/file-markdown -->
|
|
4
|
+
<!-- Generator: Adobe Illustrator 30.6.0, SVG Export Plug-In . SVG Version: 2.1.4 Build 109) -->
|
|
5
|
+
<defs>
|
|
6
|
+
<style>
|
|
7
|
+
.st0 {
|
|
8
|
+
fill: #fff;
|
|
9
|
+
}
|
|
10
|
+
</style>
|
|
11
|
+
</defs>
|
|
12
|
+
<path class="st0" d="M542.66,175.55c4.69,4.69,7.34,11.02,7.34,17.66v481.8c0,13.83-11.17,25-25,25H25c-13.83,0-25-11.17-25-25V25C0,11.17,11.17,0,25,0h331.8c6.64,0,13.05,2.66,17.73,7.34l168.13,168.2h0ZM492.34,204.69L345.31,57.66v147.03h147.03ZM207.91,419.48l46.18,103.88c2.01,4.51,6.48,7.42,11.42,7.42h18.8c4.94,0,9.42-2.91,11.43-7.43l46.17-104.18v123.02c0,6.9,5.6,12.5,12.5,12.5h21.37c6.9,0,12.5-5.6,12.5-12.5v-212.5c0-6.9-5.6-12.5-12.5-12.5h-27.15c-4.98,0-9.48,2.95-11.46,7.52l-62.09,142.64-62.09-142.65c-1.99-4.56-6.49-7.51-11.46-7.51h-27.3c-6.9,0-12.5,5.6-12.5,12.5v212.5c0,6.9,5.6,12.5,12.5,12.5h21.2c6.9,0,12.5-5.6,12.5-12.5v-122.71Z"/>
|
|
13
|
+
</svg>
|