plocate2 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. plocate2-0.1.0/.gitignore +5 -0
  2. plocate2-0.1.0/LICENSE +21 -0
  3. plocate2-0.1.0/PKG-INFO +297 -0
  4. plocate2-0.1.0/README.md +282 -0
  5. plocate2-0.1.0/asset/test/test.db +0 -0
  6. plocate2-0.1.0/development/build.sh +5 -0
  7. plocate2-0.1.0/development/generate.sh +4 -0
  8. plocate2-0.1.0/development/push.sh +5 -0
  9. plocate2-0.1.0/pyproject.toml +39 -0
  10. plocate2-0.1.0/src/plocate/__init__.py +21 -0
  11. plocate2-0.1.0/src/plocate/binary_reader.py +58 -0
  12. plocate2-0.1.0/src/plocate/config.py +53 -0
  13. plocate2-0.1.0/src/plocate/constants.py +3 -0
  14. plocate2-0.1.0/src/plocate/database.py +284 -0
  15. plocate2-0.1.0/src/plocate/directory_data.py +115 -0
  16. plocate2-0.1.0/src/plocate/entrypoint/__init__.py +0 -0
  17. plocate2-0.1.0/src/plocate/entrypoint/export.py +68 -0
  18. plocate2-0.1.0/src/plocate/entrypoint/search.py +136 -0
  19. plocate2-0.1.0/src/plocate/entrypoint/stats.py +63 -0
  20. plocate2-0.1.0/src/plocate/errors.py +8 -0
  21. plocate2-0.1.0/src/plocate/export.py +120 -0
  22. plocate2-0.1.0/src/plocate/filename_index.py +73 -0
  23. plocate2-0.1.0/src/plocate/formatting.py +92 -0
  24. plocate2-0.1.0/src/plocate/header.py +78 -0
  25. plocate2-0.1.0/src/plocate/indexed_entry.py +19 -0
  26. plocate2-0.1.0/src/plocate/indexed_search.py +98 -0
  27. plocate2-0.1.0/src/plocate/patterns.py +166 -0
  28. plocate2-0.1.0/src/plocate/posting_list.py +436 -0
  29. plocate2-0.1.0/src/plocate/search.py +219 -0
  30. plocate2-0.1.0/src/plocate/stats.py +102 -0
  31. plocate2-0.1.0/src/plocate/trigram_index.py +152 -0
  32. plocate2-0.1.0/src/plocate/trigram_patterns.py +163 -0
  33. plocate2-0.1.0/tests/conftest.py +106 -0
  34. plocate2-0.1.0/tests/entrypoint/__init__.py +0 -0
  35. plocate2-0.1.0/tests/entrypoint/test_export.py +95 -0
  36. plocate2-0.1.0/tests/entrypoint/test_search.py +66 -0
  37. plocate2-0.1.0/tests/entrypoint/test_stats.py +44 -0
  38. plocate2-0.1.0/tests/support/__init__.py +0 -0
  39. plocate2-0.1.0/tests/support/fixture_builder.py +137 -0
  40. plocate2-0.1.0/tests/support/index_builder.py +191 -0
  41. plocate2-0.1.0/tests/support/updatedb_fixture.py +8 -0
  42. plocate2-0.1.0/tests/test_binary_reader.py +28 -0
  43. plocate2-0.1.0/tests/test_config.py +26 -0
  44. plocate2-0.1.0/tests/test_database.py +125 -0
  45. plocate2-0.1.0/tests/test_directory_data.py +22 -0
  46. plocate2-0.1.0/tests/test_export.py +89 -0
  47. plocate2-0.1.0/tests/test_filename_index.py +49 -0
  48. plocate2-0.1.0/tests/test_formatting.py +50 -0
  49. plocate2-0.1.0/tests/test_header.py +80 -0
  50. plocate2-0.1.0/tests/test_indexed_search.py +124 -0
  51. plocate2-0.1.0/tests/test_patterns.py +93 -0
  52. plocate2-0.1.0/tests/test_posting_list.py +23 -0
  53. plocate2-0.1.0/tests/test_search.py +36 -0
  54. plocate2-0.1.0/tests/test_stats.py +57 -0
@@ -0,0 +1,5 @@
1
+ /dist/
2
+
3
+ __pycache__/
4
+
5
+ /.python-version
plocate2-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Dustin Oprea
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,297 @@
1
+ Metadata-Version: 2.4
2
+ Name: plocate2
3
+ Version: 0.1.0
4
+ Summary: Python library and tools for reading plocate database files
5
+ Project-URL: Homepage, https://github.com/dsoprea/PyPlocate
6
+ License-File: LICENSE
7
+ Requires-Python: >=3.11
8
+ Requires-Dist: zstandard>=0.22.0
9
+ Provides-Extra: build
10
+ Requires-Dist: build; extra == 'build'
11
+ Requires-Dist: twine; extra == 'build'
12
+ Provides-Extra: dev
13
+ Requires-Dist: pytest>=8.0.0; extra == 'dev'
14
+ Description-Content-Type: text/markdown
15
+
16
+ # plocate
17
+
18
+ [![PyPI version](https://img.shields.io/pypi/v/plocate2)](https://pypi.org/project/plocate2/)
19
+
20
+ ## Overview
21
+
22
+ Python library and command-line tools for reading [plocate](https://plocate.sesse.net) database files (`plocate.db`), the default 'locate' implementation of Arch, Debian, Ubuntu, and other Linux distributions.
23
+
24
+ This can enable an application to have immediate and optimized access to a reasonably up-to-date catalog of the complete filesystem and to *avoid the overhead of manually scanning every file*.
25
+
26
+ This not only requires a Linux system that has *plocate* installed and running, but that you can wait, or manage the reality of having to wait, for the next plocate update before you can see new files.
27
+
28
+ ## Installation
29
+
30
+ ```bash
31
+ python -m venv .venv
32
+ source .venv/bin/activate
33
+ pip install -e .
34
+ ```
35
+
36
+ ## Usage Via Library
37
+
38
+ Examples use the checked-in fixture at `asset/test/test.db` unless noted otherwise. That file is an `updatedb` snapshot of this repository, so paths in the output below are shown relative to the repository root (`./...`).
39
+
40
+ Search via the trigram index on a healthy database (substring or glob patterns):
41
+
42
+ ```python
43
+ import plocate
44
+
45
+ with plocate.PlocateDatabase.open("asset/test/test.db") as database:
46
+ options = plocate.SearchOptions(force_indexed_search=True)
47
+ for path in plocate.search_database(database, "*.py", options=options):
48
+ print(path)
49
+ ```
50
+
51
+ ```
52
+ ./src/plocate/__init__.py
53
+ ./src/plocate/binary_reader.py
54
+ ./src/plocate/config.py
55
+ ...
56
+ (37 paths)
57
+ ```
58
+
59
+ Search via a full scan when the on-disk index is unreadable (for example, a truncated database):
60
+
61
+ ```python
62
+ import plocate
63
+
64
+ with plocate.PlocateDatabase.open("truncated-plocate.db") as database:
65
+ options = plocate.SearchOptions(force_linear_search=True)
66
+ for path in plocate.search_database(database, "readme", options=options):
67
+ print(path)
68
+ ```
69
+
70
+ ```
71
+ /tmp/example/readme.txt
72
+ ```
73
+
74
+ `search_database` also scans every filename block on healthy databases when the pattern cannot use the index, such as regex searches:
75
+
76
+ ```python
77
+ import plocate
78
+
79
+ options = plocate.SearchOptions(use_regex=True)
80
+ with plocate.PlocateDatabase.open("asset/test/test.db") as database:
81
+ for path in plocate.search_database(database, r"\.py$", options=options):
82
+ print(path)
83
+ ```
84
+
85
+ ```
86
+ ./src/plocate/__init__.py
87
+ ./src/plocate/binary_reader.py
88
+ ./src/plocate/config.py
89
+ ...
90
+ (37 paths)
91
+ ```
92
+
93
+ Export indexed records:
94
+
95
+ ```python
96
+ import plocate
97
+
98
+ options = plocate.ExportOptions(include_pattern="*.py")
99
+ with plocate.PlocateDatabase.open("asset/test/test.db") as database:
100
+ for record in plocate.iter_export_records(database, options=options):
101
+ print(record.to_dict())
102
+ ```
103
+
104
+ ```
105
+ {'path': './src/plocate/__init__.py', 'docid': 0, 'block_index': 21, 'database_version': 1, 'max_version': 2, 'check_visibility': True, 'is_directory': False}
106
+ {'path': './src/plocate/binary_reader.py', 'docid': 0, 'block_index': 22, 'database_version': 1, 'max_version': 2, 'check_visibility': True, 'is_directory': False}
107
+ ...
108
+ (37 records)
109
+ ```
110
+
111
+ Inspect indexed entries with metadata:
112
+
113
+ ```python
114
+ import plocate
115
+
116
+ # Synthetic example database with /tmp/example/... paths.
117
+ with plocate.PlocateDatabase.open("example.db") as database:
118
+ for entry in database.iter_indexed_entries():
119
+ print(entry.path, entry.docid, entry.directory_time)
120
+ ```
121
+
122
+ ```
123
+ /tmp/example/.catalog-repository.yaml 0 None
124
+ /tmp/example/readme.txt 0 None
125
+ /var/log/syslog 0 None
126
+ ```
127
+
128
+ The `plocate.db` format does not store a dedicated "last updated" timestamp. The closest equivalent is the on-disk modification time of the database file itself.
129
+
130
+ ```python
131
+ import plocate
132
+
133
+ with plocate.PlocateDatabase.open("asset/test/test.db") as database:
134
+ modification_time = database.file_mtime()
135
+ print(modification_time)
136
+ ```
137
+
138
+ ```
139
+ 1779677352.9139218
140
+ ```
141
+
142
+ The package also reads configuration blocks, directory timestamp streams, and trigram posting lists from real `plocate.db` files produced by upstream `updatedb` / `plocate-build`.
143
+
144
+ ## Usage Via Commandline
145
+
146
+ The project is primarily meant to be used as a library. The command-line tools (`pl_stats`, `pl_search`, `pl_export`) are provided for testing, diagnosing, and general convenience; they wrap the same APIs documented above.
147
+
148
+ Examples use `asset/test/test.db` unless noted otherwise. Paths are shown relative to the repository root where applicable.
149
+
150
+ Print database statistics:
151
+
152
+ ```bash
153
+ pl_stats /var/lib/plocate/plocate.db
154
+ pl_stats --json asset/test/test.db
155
+ ```
156
+
157
+ ```
158
+ $ pl_stats /var/lib/plocate/plocate.db
159
+ database: /var/lib/plocate/plocate.db
160
+ file size: ...
161
+ indexed paths: ...
162
+ ...
163
+
164
+ $ pl_stats --json asset/test/test.db
165
+ {
166
+ "database_path": "asset/test/test.db",
167
+ "path_count": 104,
168
+ "num_docids": 4,
169
+ "version": 1,
170
+ "max_version": 2,
171
+ ...
172
+ }
173
+ ```
174
+
175
+ Search for paths:
176
+
177
+ ```bash
178
+ pl_search asset/test/test.db pyproject.toml
179
+ pl_search /var/lib/plocate/plocate.db -c '*.py'
180
+ pl_search asset/test/test.db --regex 'pyproject\.toml$'
181
+ pl_search asset/test/test.db -i readme
182
+ pl_search asset/test/test.db -l 10 '*.py'
183
+ ```
184
+
185
+ ```
186
+ $ pl_search asset/test/test.db pyproject.toml
187
+ ./pyproject.toml
188
+
189
+ $ pl_search /var/lib/plocate/plocate.db -c '*.py'
190
+ 12345
191
+
192
+ $ pl_search asset/test/test.db --regex 'pyproject\.toml$'
193
+ ./pyproject.toml
194
+
195
+ $ pl_search asset/test/test.db -i readme
196
+ ./README.md
197
+ ./.pytest_cache/README.md
198
+
199
+ $ pl_search asset/test/test.db -l 10 '*.py'
200
+ ./src/plocate/__init__.py
201
+ ./src/plocate/binary_reader.py
202
+ ./src/plocate/config.py
203
+ ./src/plocate/constants.py
204
+ ./src/plocate/database.py
205
+ ./src/plocate/directory_data.py
206
+ ./src/plocate/errors.py
207
+ ./src/plocate/export.py
208
+ ./src/plocate/filename_index.py
209
+ ./src/plocate/formatting.py
210
+ ```
211
+
212
+ `pl_search` uses the database trigram index on healthy `plocate.db` files. Upstream `updatedb` / `plocate-build` always write the hash table and posting lists, so a complete database normally has an index. Substring and glob searches narrow candidate filename blocks through that hash table, then verify matches in those blocks.
213
+
214
+ If the file is truncated or header metadata points past EOF, the reader treats the index as absent and falls back to a full scan of every filename block. Results can still be correct, but the search is slower. Use `--scan` to force that path, or `--indexed` to require trigram-index search. Regex searches (`-r` / `--regex`) always use a full scan, even on healthy databases, because patterns are not indexed the same way.
215
+
216
+ Indexed search on a healthy database (typical case):
217
+
218
+ ```bash
219
+ pl_search /var/lib/plocate/plocate.db '*.py'
220
+ pl_search asset/test/test.db --indexed '*.py'
221
+ ```
222
+
223
+ ```
224
+ $ pl_search asset/test/test.db --indexed '*.py'
225
+ ./src/plocate/__init__.py
226
+ ./src/plocate/binary_reader.py
227
+ ./src/plocate/config.py
228
+ ...
229
+ (37 paths)
230
+ ```
231
+
232
+ Full-scan fallback when the on-disk index is unreadable (for example, a truncated copy of the same file):
233
+
234
+ ```bash
235
+ # Same command; pl_search scans all path blocks instead of the hash table.
236
+ pl_search truncated-plocate.db readme
237
+ pl_search asset/test/test.db --scan '*.py'
238
+ ```
239
+
240
+ ```
241
+ $ pl_search truncated-plocate.db readme
242
+ /tmp/example/readme.txt
243
+
244
+ $ pl_search asset/test/test.db --scan '*.py'
245
+ ./src/plocate/__init__.py
246
+ ./src/plocate/binary_reader.py
247
+ ./src/plocate/config.py
248
+ ...
249
+ (37 paths)
250
+ ```
251
+
252
+ Export indexed paths as JSON Lines:
253
+
254
+ ```bash
255
+ pl_export asset/test/test.db
256
+ pl_export asset/test/test.db --include './src/plocate/*'
257
+ pl_export /var/lib/plocate/plocate.db --include '*.py'
258
+ ```
259
+
260
+ ```
261
+ $ pl_export asset/test/test.db | head -n 1
262
+ {"block_index": 0, "check_visibility": true, "database_version": 1, "directory_time_nanoseconds": ..., "directory_time_seconds": ..., "docid": 0, "is_directory": true, "max_version": 2, "path": "./.cursor"}
263
+
264
+ $ pl_export asset/test/test.db --include './src/plocate/*' | head -n 1
265
+ {"block_index": 21, "check_visibility": true, "database_version": 1, "docid": 0, "is_directory": false, "max_version": 2, "path": "./src/plocate/__init__.py"}
266
+
267
+ $ pl_export /var/lib/plocate/plocate.db --include '*.py' | head -n 1
268
+ {"block_index": ..., "path": "/usr/lib/python3.14/...", ...}
269
+ ```
270
+
271
+ Each export row includes the path plus index and header metadata. When the database stores directory timestamps, directory entries also include `is_directory` and `directory_time_*` fields. Regular files include `is_directory: false` but do not store per-file timestamps.
272
+
273
+ Example export row:
274
+
275
+ ```json
276
+ {
277
+ "block_index": 1,
278
+ "check_visibility": false,
279
+ "database_version": 1,
280
+ "docid": 0,
281
+ "max_version": 1,
282
+ "path": "/tmp/example/readme.txt"
283
+ }
284
+ ```
285
+
286
+ ## Layout
287
+
288
+ - Library modules: `src/plocate/`
289
+ - CLI entrypoints: `src/plocate/entrypoint/` (`pl_stats`, `pl_search`, `pl_export`)
290
+ - Tests mirror library paths under `tests/`
291
+
292
+ ```bash
293
+ pip install -e ".[dev]"
294
+ pytest
295
+ ```
296
+
297
+ Reading the database requires permission to open the file. On many systems `/var/lib/plocate/plocate.db` is readable only by the `locate` group or via the setgid `plocate` binary.
@@ -0,0 +1,282 @@
1
+ # plocate
2
+
3
+ [![PyPI version](https://img.shields.io/pypi/v/plocate2)](https://pypi.org/project/plocate2/)
4
+
5
+ ## Overview
6
+
7
+ Python library and command-line tools for reading [plocate](https://plocate.sesse.net) database files (`plocate.db`), the default 'locate' implementation of Arch, Debian, Ubuntu, and other Linux distributions.
8
+
9
+ This can enable an application to have immediate and optimized access to a reasonably up-to-date catalog of the complete filesystem and to *avoid the overhead of manually scanning every file*.
10
+
11
+ This not only requires a Linux system that has *plocate* installed and running, but that you can wait, or manage the reality of having to wait, for the next plocate update before you can see new files.
12
+
13
+ ## Installation
14
+
15
+ ```bash
16
+ python -m venv .venv
17
+ source .venv/bin/activate
18
+ pip install -e .
19
+ ```
20
+
21
+ ## Usage Via Library
22
+
23
+ Examples use the checked-in fixture at `asset/test/test.db` unless noted otherwise. That file is an `updatedb` snapshot of this repository, so paths in the output below are shown relative to the repository root (`./...`).
24
+
25
+ Search via the trigram index on a healthy database (substring or glob patterns):
26
+
27
+ ```python
28
+ import plocate
29
+
30
+ with plocate.PlocateDatabase.open("asset/test/test.db") as database:
31
+ options = plocate.SearchOptions(force_indexed_search=True)
32
+ for path in plocate.search_database(database, "*.py", options=options):
33
+ print(path)
34
+ ```
35
+
36
+ ```
37
+ ./src/plocate/__init__.py
38
+ ./src/plocate/binary_reader.py
39
+ ./src/plocate/config.py
40
+ ...
41
+ (37 paths)
42
+ ```
43
+
44
+ Search via a full scan when the on-disk index is unreadable (for example, a truncated database):
45
+
46
+ ```python
47
+ import plocate
48
+
49
+ with plocate.PlocateDatabase.open("truncated-plocate.db") as database:
50
+ options = plocate.SearchOptions(force_linear_search=True)
51
+ for path in plocate.search_database(database, "readme", options=options):
52
+ print(path)
53
+ ```
54
+
55
+ ```
56
+ /tmp/example/readme.txt
57
+ ```
58
+
59
+ `search_database` also scans every filename block on healthy databases when the pattern cannot use the index, such as regex searches:
60
+
61
+ ```python
62
+ import plocate
63
+
64
+ options = plocate.SearchOptions(use_regex=True)
65
+ with plocate.PlocateDatabase.open("asset/test/test.db") as database:
66
+ for path in plocate.search_database(database, r"\.py$", options=options):
67
+ print(path)
68
+ ```
69
+
70
+ ```
71
+ ./src/plocate/__init__.py
72
+ ./src/plocate/binary_reader.py
73
+ ./src/plocate/config.py
74
+ ...
75
+ (37 paths)
76
+ ```
77
+
78
+ Export indexed records:
79
+
80
+ ```python
81
+ import plocate
82
+
83
+ options = plocate.ExportOptions(include_pattern="*.py")
84
+ with plocate.PlocateDatabase.open("asset/test/test.db") as database:
85
+ for record in plocate.iter_export_records(database, options=options):
86
+ print(record.to_dict())
87
+ ```
88
+
89
+ ```
90
+ {'path': './src/plocate/__init__.py', 'docid': 0, 'block_index': 21, 'database_version': 1, 'max_version': 2, 'check_visibility': True, 'is_directory': False}
91
+ {'path': './src/plocate/binary_reader.py', 'docid': 0, 'block_index': 22, 'database_version': 1, 'max_version': 2, 'check_visibility': True, 'is_directory': False}
92
+ ...
93
+ (37 records)
94
+ ```
95
+
96
+ Inspect indexed entries with metadata:
97
+
98
+ ```python
99
+ import plocate
100
+
101
+ # Synthetic example database with /tmp/example/... paths.
102
+ with plocate.PlocateDatabase.open("example.db") as database:
103
+ for entry in database.iter_indexed_entries():
104
+ print(entry.path, entry.docid, entry.directory_time)
105
+ ```
106
+
107
+ ```
108
+ /tmp/example/.catalog-repository.yaml 0 None
109
+ /tmp/example/readme.txt 0 None
110
+ /var/log/syslog 0 None
111
+ ```
112
+
113
+ The `plocate.db` format does not store a dedicated "last updated" timestamp. The closest equivalent is the on-disk modification time of the database file itself.
114
+
115
+ ```python
116
+ import plocate
117
+
118
+ with plocate.PlocateDatabase.open("asset/test/test.db") as database:
119
+ modification_time = database.file_mtime()
120
+ print(modification_time)
121
+ ```
122
+
123
+ ```
124
+ 1779677352.9139218
125
+ ```
126
+
127
+ The package also reads configuration blocks, directory timestamp streams, and trigram posting lists from real `plocate.db` files produced by upstream `updatedb` / `plocate-build`.
128
+
129
+ ## Usage Via Commandline
130
+
131
+ The project is primarily meant to be used as a library. The command-line tools (`pl_stats`, `pl_search`, `pl_export`) are provided for testing, diagnosing, and general convenience; they wrap the same APIs documented above.
132
+
133
+ Examples use `asset/test/test.db` unless noted otherwise. Paths are shown relative to the repository root where applicable.
134
+
135
+ Print database statistics:
136
+
137
+ ```bash
138
+ pl_stats /var/lib/plocate/plocate.db
139
+ pl_stats --json asset/test/test.db
140
+ ```
141
+
142
+ ```
143
+ $ pl_stats /var/lib/plocate/plocate.db
144
+ database: /var/lib/plocate/plocate.db
145
+ file size: ...
146
+ indexed paths: ...
147
+ ...
148
+
149
+ $ pl_stats --json asset/test/test.db
150
+ {
151
+ "database_path": "asset/test/test.db",
152
+ "path_count": 104,
153
+ "num_docids": 4,
154
+ "version": 1,
155
+ "max_version": 2,
156
+ ...
157
+ }
158
+ ```
159
+
160
+ Search for paths:
161
+
162
+ ```bash
163
+ pl_search asset/test/test.db pyproject.toml
164
+ pl_search /var/lib/plocate/plocate.db -c '*.py'
165
+ pl_search asset/test/test.db --regex 'pyproject\.toml$'
166
+ pl_search asset/test/test.db -i readme
167
+ pl_search asset/test/test.db -l 10 '*.py'
168
+ ```
169
+
170
+ ```
171
+ $ pl_search asset/test/test.db pyproject.toml
172
+ ./pyproject.toml
173
+
174
+ $ pl_search /var/lib/plocate/plocate.db -c '*.py'
175
+ 12345
176
+
177
+ $ pl_search asset/test/test.db --regex 'pyproject\.toml$'
178
+ ./pyproject.toml
179
+
180
+ $ pl_search asset/test/test.db -i readme
181
+ ./README.md
182
+ ./.pytest_cache/README.md
183
+
184
+ $ pl_search asset/test/test.db -l 10 '*.py'
185
+ ./src/plocate/__init__.py
186
+ ./src/plocate/binary_reader.py
187
+ ./src/plocate/config.py
188
+ ./src/plocate/constants.py
189
+ ./src/plocate/database.py
190
+ ./src/plocate/directory_data.py
191
+ ./src/plocate/errors.py
192
+ ./src/plocate/export.py
193
+ ./src/plocate/filename_index.py
194
+ ./src/plocate/formatting.py
195
+ ```
196
+
197
+ `pl_search` uses the database trigram index on healthy `plocate.db` files. Upstream `updatedb` / `plocate-build` always write the hash table and posting lists, so a complete database normally has an index. Substring and glob searches narrow candidate filename blocks through that hash table, then verify matches in those blocks.
198
+
199
+ If the file is truncated or header metadata points past EOF, the reader treats the index as absent and falls back to a full scan of every filename block. Results can still be correct, but the search is slower. Use `--scan` to force that path, or `--indexed` to require trigram-index search. Regex searches (`-r` / `--regex`) always use a full scan, even on healthy databases, because patterns are not indexed the same way.
200
+
201
+ Indexed search on a healthy database (typical case):
202
+
203
+ ```bash
204
+ pl_search /var/lib/plocate/plocate.db '*.py'
205
+ pl_search asset/test/test.db --indexed '*.py'
206
+ ```
207
+
208
+ ```
209
+ $ pl_search asset/test/test.db --indexed '*.py'
210
+ ./src/plocate/__init__.py
211
+ ./src/plocate/binary_reader.py
212
+ ./src/plocate/config.py
213
+ ...
214
+ (37 paths)
215
+ ```
216
+
217
+ Full-scan fallback when the on-disk index is unreadable (for example, a truncated copy of the same file):
218
+
219
+ ```bash
220
+ # Same command; pl_search scans all path blocks instead of the hash table.
221
+ pl_search truncated-plocate.db readme
222
+ pl_search asset/test/test.db --scan '*.py'
223
+ ```
224
+
225
+ ```
226
+ $ pl_search truncated-plocate.db readme
227
+ /tmp/example/readme.txt
228
+
229
+ $ pl_search asset/test/test.db --scan '*.py'
230
+ ./src/plocate/__init__.py
231
+ ./src/plocate/binary_reader.py
232
+ ./src/plocate/config.py
233
+ ...
234
+ (37 paths)
235
+ ```
236
+
237
+ Export indexed paths as JSON Lines:
238
+
239
+ ```bash
240
+ pl_export asset/test/test.db
241
+ pl_export asset/test/test.db --include './src/plocate/*'
242
+ pl_export /var/lib/plocate/plocate.db --include '*.py'
243
+ ```
244
+
245
+ ```
246
+ $ pl_export asset/test/test.db | head -n 1
247
+ {"block_index": 0, "check_visibility": true, "database_version": 1, "directory_time_nanoseconds": ..., "directory_time_seconds": ..., "docid": 0, "is_directory": true, "max_version": 2, "path": "./.cursor"}
248
+
249
+ $ pl_export asset/test/test.db --include './src/plocate/*' | head -n 1
250
+ {"block_index": 21, "check_visibility": true, "database_version": 1, "docid": 0, "is_directory": false, "max_version": 2, "path": "./src/plocate/__init__.py"}
251
+
252
+ $ pl_export /var/lib/plocate/plocate.db --include '*.py' | head -n 1
253
+ {"block_index": ..., "path": "/usr/lib/python3.14/...", ...}
254
+ ```
255
+
256
+ Each export row includes the path plus index and header metadata. When the database stores directory timestamps, directory entries also include `is_directory` and `directory_time_*` fields. Regular files include `is_directory: false` but do not store per-file timestamps.
257
+
258
+ Example export row:
259
+
260
+ ```json
261
+ {
262
+ "block_index": 1,
263
+ "check_visibility": false,
264
+ "database_version": 1,
265
+ "docid": 0,
266
+ "max_version": 1,
267
+ "path": "/tmp/example/readme.txt"
268
+ }
269
+ ```
270
+
271
+ ## Layout
272
+
273
+ - Library modules: `src/plocate/`
274
+ - CLI entrypoints: `src/plocate/entrypoint/` (`pl_stats`, `pl_search`, `pl_export`)
275
+ - Tests mirror library paths under `tests/`
276
+
277
+ ```bash
278
+ pip install -e ".[dev]"
279
+ pytest
280
+ ```
281
+
282
+ Reading the database requires permission to open the file. On many systems `/var/lib/plocate/plocate.db` is readable only by the `locate` group or via the setgid `plocate` binary.
Binary file
@@ -0,0 +1,5 @@
1
+ #!/bin/bash -ex
2
+
3
+ cd "$(dirname "$0")/.."
4
+
5
+ python3 -m build
@@ -0,0 +1,4 @@
1
+ #!/bin/bash -ex
2
+
3
+ updatedb -o test.db --database-root ./
4
+
@@ -0,0 +1,5 @@
1
+ #!/bin/bash -ex
2
+
3
+ cd "$(dirname "$0")/.."
4
+
5
+ python3 -m twine upload dist/*
@@ -0,0 +1,39 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "plocate2"
7
+ version = "0.1.0"
8
+ description = "Python library and tools for reading plocate database files"
9
+ readme = "README.md"
10
+ requires-python = ">=3.11"
11
+ dependencies = [
12
+ "zstandard>=0.22.0",
13
+ ]
14
+
15
+ [project.urls]
16
+ Homepage = "https://github.com/dsoprea/PyPlocate"
17
+
18
+ [project.optional-dependencies]
19
+ dev = [
20
+ "pytest>=8.0.0",
21
+ ]
22
+ build = [
23
+ "build",
24
+ "twine",
25
+ ]
26
+
27
+ [tool.pytest.ini_options]
28
+ testpaths = ["tests"]
29
+
30
+ [project.scripts]
31
+ pl_stats = "plocate.entrypoint.stats:main"
32
+ pl_search = "plocate.entrypoint.search:main"
33
+ pl_export = "plocate.entrypoint.export:main"
34
+
35
+ [tool.hatch.build.targets.wheel]
36
+ packages = ["src/plocate"]
37
+
38
+ [tool.hatch.build.targets.wheel.sources]
39
+ "src" = ""
@@ -0,0 +1,21 @@
1
+ """Read and search plocate.db index files."""
2
+
3
+ import plocate.database
4
+ import plocate.export
5
+ import plocate.search
6
+
7
+
8
+ PlocateDatabase = plocate.database.PlocateDatabase
9
+ ExportOptions = plocate.export.ExportOptions
10
+ SearchOptions = plocate.search.SearchOptions
11
+
12
+ iter_export_records = plocate.export.iter_export_records
13
+ search_database = plocate.search.search_database
14
+
15
+ __all__ = [
16
+ "ExportOptions",
17
+ "PlocateDatabase",
18
+ "SearchOptions",
19
+ "iter_export_records",
20
+ "search_database",
21
+ ]