nfscache 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,29 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.egg-info/
5
+ build/
6
+ dist/
7
+
8
+ # Virtual environment
9
+ .venv/
10
+
11
+ # uv
12
+ .uv/
13
+
14
+ # Testing / coverage
15
+ .pytest_cache/
16
+ .coverage
17
+ htmlcov/
18
+ __cache__/
19
+
20
+ # Editor / OS
21
+ .idea/
22
+ .vscode/
23
+ .DS_Store
24
+ *.parquet
25
+ *.env
26
+ *.log
27
+ *.txt
28
+ .claude/
29
+
@@ -0,0 +1,11 @@
1
+ # Lightweight-ish Oracle for dev/testing (Oracle Database Free)
2
+ FROM gvenzl/oracle-free:23-slim
3
+
4
+ # Init scripts: executed once on first DB initialization, in alphabetical order
5
+ # SQL scripts run as SYS; if you want app schema objects, you must CONNECT as that user inside the script.
6
+ COPY ./init/ /container-entrypoint-initdb.d/
7
+
8
+ EXPOSE 1521
9
+
10
+ # Image ships with healthcheck.sh; keep it simple
11
+ HEALTHCHECK --interval=10s --timeout=5s --retries=20 CMD ["healthcheck.sh"]
nfscache-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Mannetroll
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,284 @@
1
+ Metadata-Version: 2.4
2
+ Name: nfscache
3
+ Version: 0.1.0
4
+ Summary: Concurrency-safe, NFS-friendly Parquet cache for Polars DataFrames
5
+ Author: Torbjörn Sjögren
6
+ License-Expression: MIT
7
+ License-File: LICENSE
8
+ Keywords: cache,concurrency,dataframe,nfs,oracle,parquet,polars
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Operating System :: POSIX
12
+ Classifier: Programming Language :: Python :: 3.13
13
+ Classifier: Topic :: Database
14
+ Classifier: Topic :: System :: Filesystems
15
+ Classifier: Typing :: Typed
16
+ Requires-Python: <3.14,>=3.13
17
+ Requires-Dist: numpy<3,>=2.4.6
18
+ Requires-Dist: oracledb<4,>=3.4.2
19
+ Requires-Dist: polars<2,>=1.41.2
20
+ Requires-Dist: pyarrow<25,>=24.0.0
21
+ Description-Content-Type: text/markdown
22
+
23
+ # nfscache
24
+
25
+ Prototype NFS-backed cache for `DataContainer` objects whose payload is a
26
+ Polars `DataFrame`.
27
+
28
+ The cache stores container data as Parquet on a shared filesystem. Cold loads
29
+ can read from any slow source, for example Oracle, MySQL, or a local parquet
30
+ file. Warm loads use `polars.read_parquet`.
31
+
32
+ ## Install
33
+
34
+ ```bash
35
+ pip install nfscache
36
+ ```
37
+
38
+ ## Usage
39
+
40
+ Create an `NFSCache` pointed at a directory on the shared filesystem, then wrap
41
+ your cold-load function with a decorator. The wrapped function only runs on a
42
+ cache miss; warm hits are served from the Parquet cache.
43
+
44
+ ```python
45
+ from pathlib import Path
46
+
47
+ import polars as pl
48
+
49
+ from nfscache.nfs_cache import NFSCache
50
+ from nfscache.data.data_container import DataContainer
51
+
52
+ nfscache = NFSCache(Path("__cache__/nfs"))
53
+
54
+
55
+ # File / in-process source: cache key and version come from `filename`.
56
+ @nfscache.parquet
57
+ def load(filename: Path) -> DataContainer:
58
+ df = pl.read_parquet(filename)
59
+ return DataContainer({"headers": tuple(df.columns), "data": df})
60
+
61
+
62
+ container = load(Path("parquet/A_TEST.parquet")) # cold: runs the body
63
+ container = load(Path("parquet/A_TEST.parquet")) # warm: served from cache
64
+ df = container.data.rows_data_pl
65
+ ```
66
+
67
+ For SQL sources, set `nfscache.connect_factory` (a `Callable[[], connection]`)
68
+ and use `@nfscache.sql`; the first argument is the SQL string. The cache key is
69
+ derived from the normalized SQL and the source version from
70
+ `MAX(ORA_ROWSCN)` plus the row count. See `nfscache/database/oracle_read.py`
71
+ for a complete Oracle wiring example.
72
+
73
+ ## Current Functionality
74
+
75
+ - Decorator API: `@nfscache.parquet` and `@nfscache.sql`.
76
+ - Stores `DataContainer.data.rows_data_pl` as a Parquet cache file.
77
+ - Reads cached objects with the fast Polars parquet reader.
78
+ - Writes cached objects with `pyarrow.parquet.ParquetWriter`.
79
+ - Writes through unique `*.part` files, then atomically replaces the final file
80
+ with `os.replace`.
81
+ - Cleans up partial cache files on write failure.
82
+ - Uses a per-cache-key mkdir-based read/write lock: warm readers create
83
+ per-reader tokens and can overlap, while writers and invalidations block new
84
+ readers and wait for active readers to finish.
85
+ - Lock tokens include `lock.json` metadata with hostname, PID, UUID,
86
+ `created_at`, and `last_seen`; held locks heartbeat `last_seen`, and stale
87
+ reader/writer tokens are broken after `stale_lock_seconds`.
88
+ - The default stale lock timeout is 30 minutes, sized for cold Oracle reads that
89
+ can take around 10 minutes while still heartbeating as live work.
90
+ - Adds an authoritative metadata sidecar:
91
+
92
+ ```text
93
+ __cache__/nfs/parquet/A_TEST_1048576.parquet
94
+ __cache__/nfs/parquet/A_TEST_1048576.parquet.meta.json
95
+ ```
96
+
97
+ - Metadata includes source key/version, parquet byte size, parquet SHA-256, row
98
+ count, column count, schema hash, writer version, created time, and normalized
99
+ `source_sql` for SQL-backed entries.
100
+ - Readers reject missing, stale, unsupported, or corrupt metadata and validate
101
+ parquet size/checksum/row count/schema hash before returning a warm hit.
102
+ - Invalidates stale cache entries when the source version changes.
103
+ - For file path arguments, the default source version is a SHA-256 content hash.
104
+ - SQL sources use normalized SQL for cache keys and `COUNT(*)` plus
105
+ `MAX(ORA_ROWSCN)` as the Oracle version token for the detected `FROM` table.
106
+ - Cold loads re-read the source version before and after loading and retry if
107
+ the source changes during the read.
108
+
109
+ ## Demo
110
+
111
+ Run:
112
+
113
+ ```bash
114
+ uv run --no-cache --no-sync python -m nfscache.util.main
115
+ ```
116
+
117
+ `main.py` runs:
118
+
119
+ 1. clear `__cache__`
120
+ 2. generate parquet source data
121
+ 3. cold load and write cache
122
+ 4. warm cache load
123
+ 5. regenerate parquet source data
124
+ 6. reload because the source hash changed
125
+ 7. warm cache load again
126
+
127
+ Expected shape:
128
+
129
+ ```text
130
+ Clearing cache: __cache__
131
+ Generating: parquet/A_TEST_1048576.parquet...
132
+ Reading: parquet/A_TEST_1048576.parquet...
133
+ Returning cached object: parquet/A_TEST_1048576.parquet sha=<first 40 chars>...
134
+ Generating: parquet/A_TEST_1048576.parquet...
135
+ Ignoring cache entry: parquet/A_TEST_1048576.parquet: stale source version
136
+ Reading: parquet/A_TEST_1048576.parquet...
137
+ Returning cached object: parquet/A_TEST_1048576.parquet sha=<first 40 chars>...
138
+ ```
139
+
140
+ ## Swarm Test
141
+
142
+ `swarm_file.py` tests a multi-client environment with process-level concurrency.
143
+ It mixes cache gets with source regeneration to simulate clients reading while
144
+ the source data changes.
145
+
146
+ Run the default swarm:
147
+
148
+ ```bash
149
+ uv run --no-cache --no-sync python -m nfscache.util.swarm_file
150
+ ```
151
+
152
+ Default behavior:
153
+
154
+ - 4 client processes
155
+ - 12 get waves
156
+ - 6 source regenerations
157
+ - generations are injected throughout the get waves
158
+ - final warm check after all waves complete
159
+
160
+ Useful smaller run:
161
+
162
+ ```bash
163
+ uv run --no-cache --no-sync python -m nfscache.util.swarm_file \
164
+ --clients 3 \
165
+ --generators 1 \
166
+ --gets-per-client 6 \
167
+ --generations 3 \
168
+ --n-rows 1024 \
169
+ --cols 6 \
170
+ --n-int-cols 2 \
171
+ --n-str-cols 1 \
172
+ --data-dir /tmp/parquet-nfs-wave-swarm-parquet \
173
+ --cache-dir /tmp/parquet-nfs-wave-swarm-cache
174
+ ```
175
+
176
+ Swarm output includes:
177
+
178
+ - source generation hash
179
+ - cold `Reading: ...` reloads after invalidation
180
+ - warm `Returning cached object: ... sha=...` hits
181
+ - final multi-client warm check
182
+
183
+ ## SQL Swarm Test
184
+
185
+ `swarm_sql.py` tests the same process-level concurrency path for Oracle-backed
186
+ SQL reads. It creates an Oracle table, runs client reads through `@nfscache.sql`,
187
+ and rewrites the table between read waves so the cache has to invalidate and
188
+ reload under load.
189
+
190
+ Start Oracle first, then run:
191
+
192
+ ```bash
193
+ uv run --no-cache --no-sync python -m nfscache.util.swarm_sql
194
+ ```
195
+
196
+ Useful smaller run:
197
+
198
+ ```bash
199
+ uv run --no-cache --no-sync python -m nfscache.util.swarm_sql \
200
+ --clients 2 \
201
+ --writers 1 \
202
+ --gets-per-client 3 \
203
+ --generations 2 \
204
+ --n-rows 128 \
205
+ --batch-size 64 \
206
+ --table SWARM_SQL_TEST \
207
+ --cache-dir /tmp/parquet-nfs-swarm-sql-cache
208
+ ```
209
+
210
+ SQL swarm output includes Oracle cold reads, writer SCNs, stale SQL cache
211
+ invalidation, warm cache hits, and a final multi-client warm check.
212
+
213
+ ## Tests
214
+
215
+ Run focused unit tests:
216
+
217
+ ```bash
218
+ uv run --no-cache --no-sync python -m unittest discover -s tests
219
+ ```
220
+
221
+ The tests cover authoritative metadata, corrupted metadata/parquet recovery,
222
+ normalized SQL metadata, overlapping warm readers, and writer-preference
223
+ locking. A syntax check for all modules:
224
+
225
+ ```bash
226
+ uv run --no-cache --no-sync python -m compileall -q nfscache tests
227
+ ```
228
+
229
+ ## Generate Parquets
230
+
231
+ Generate or replace test parquet files:
232
+
233
+ ```bash
234
+ uv run --no-cache --no-sync python -m nfscache.util.generate_parquets
235
+ ```
236
+
237
+ The generator writes to a unique `*.part` file and atomically replaces the final
238
+ parquet when the write is complete.
239
+
240
+ By default, content changes on every run. Use `--seed` for reproducible data:
241
+
242
+ ```bash
243
+ uv run --no-cache --no-sync python -m nfscache.util.generate_parquets --seed 123
244
+ ```
245
+
246
+ ## Oracle SQL Cache
247
+
248
+ Start the local Oracle demo container:
249
+
250
+ ```bash
251
+ ./build_and_run.sh [--wipe]
252
+ ```
253
+
254
+ Populate the demo table:
255
+
256
+ ```bash
257
+ uv run --no-cache --no-sync python -m nfscache.database.oracle_write_container
258
+ ```
259
+
260
+ Read through the SQL cache:
261
+
262
+ ```bash
263
+ uv run --no-cache --no-sync python -m nfscache.database.oracle_read "select * from DATA_CONTAINER_DEMO"
264
+ ```
265
+
266
+ SQL cache keys use normalized SQL plus requested columns. Metadata stores the
267
+ normalized `source_sql`, and source versions use `COUNT(*)` plus
268
+ `MAX(ORA_ROWSCN)` for the detected `FROM` table.
269
+
270
+ ## Production Notes
271
+
272
+ This is not yet production-grade enterprise software.
273
+
274
+ For Oracle on NFS with many clients, the next important pieces are:
275
+
276
+ - validate `mkdir` lock tokens, writer intent, stale-lock recovery, and
277
+ `os.replace` semantics on the actual NFS mount
278
+ - tie long Oracle reads to a documented consistent SCN/snapshot strategy
279
+ - add structured logs and metrics for hit/miss/reload, reader/writer lock wait,
280
+ cold load duration, parquet write/read duration, and corruption/retry counts
281
+ - broaden automated failure tests for crashed lock holders, corrupted files,
282
+ source changes during cold load, and multi-host NFS integration
283
+ - add operational controls for cache retention, quotas, old `*.part` cleanup,
284
+ version migration, compression, permissions, and bad-key runbooks
@@ -0,0 +1,262 @@
1
+ # nfscache
2
+
3
+ Prototype NFS-backed cache for `DataContainer` objects whose payload is a
4
+ Polars `DataFrame`.
5
+
6
+ The cache stores container data as Parquet on a shared filesystem. Cold loads
7
+ can read from any slow source, for example Oracle, MySQL, or a local parquet
8
+ file. Warm loads use `polars.read_parquet`.
9
+
10
+ ## Install
11
+
12
+ ```bash
13
+ pip install nfscache
14
+ ```
15
+
16
+ ## Usage
17
+
18
+ Create an `NFSCache` pointed at a directory on the shared filesystem, then wrap
19
+ your cold-load function with a decorator. The wrapped function only runs on a
20
+ cache miss; warm hits are served from the Parquet cache.
21
+
22
+ ```python
23
+ from pathlib import Path
24
+
25
+ import polars as pl
26
+
27
+ from nfscache.nfs_cache import NFSCache
28
+ from nfscache.data.data_container import DataContainer
29
+
30
+ nfscache = NFSCache(Path("__cache__/nfs"))
31
+
32
+
33
+ # File / in-process source: cache key and version come from `filename`.
34
+ @nfscache.parquet
35
+ def load(filename: Path) -> DataContainer:
36
+ df = pl.read_parquet(filename)
37
+ return DataContainer({"headers": tuple(df.columns), "data": df})
38
+
39
+
40
+ container = load(Path("parquet/A_TEST.parquet")) # cold: runs the body
41
+ container = load(Path("parquet/A_TEST.parquet")) # warm: served from cache
42
+ df = container.data.rows_data_pl
43
+ ```
44
+
45
+ For SQL sources, set `nfscache.connect_factory` (a `Callable[[], connection]`)
46
+ and use `@nfscache.sql`; the first argument is the SQL string. The cache key is
47
+ derived from the normalized SQL and the source version from
48
+ `MAX(ORA_ROWSCN)` plus the row count. See `nfscache/database/oracle_read.py`
49
+ for a complete Oracle wiring example.
50
+
51
+ ## Current Functionality
52
+
53
+ - Decorator API: `@nfscache.parquet` and `@nfscache.sql`.
54
+ - Stores `DataContainer.data.rows_data_pl` as a Parquet cache file.
55
+ - Reads cached objects with the fast Polars parquet reader.
56
+ - Writes cached objects with `pyarrow.parquet.ParquetWriter`.
57
+ - Writes through unique `*.part` files, then atomically replaces the final file
58
+ with `os.replace`.
59
+ - Cleans up partial cache files on write failure.
60
+ - Uses a per-cache-key mkdir-based read/write lock: warm readers create
61
+ per-reader tokens and can overlap, while writers and invalidations block new
62
+ readers and wait for active readers to finish.
63
+ - Lock tokens include `lock.json` metadata with hostname, PID, UUID,
64
+ `created_at`, and `last_seen`; held locks heartbeat `last_seen`, and stale
65
+ reader/writer tokens are broken after `stale_lock_seconds`.
66
+ - The default stale lock timeout is 30 minutes, sized for cold Oracle reads that
67
+ can take around 10 minutes while still heartbeating as live work.
68
+ - Adds an authoritative metadata sidecar:
69
+
70
+ ```text
71
+ __cache__/nfs/parquet/A_TEST_1048576.parquet
72
+ __cache__/nfs/parquet/A_TEST_1048576.parquet.meta.json
73
+ ```
74
+
75
+ - Metadata includes source key/version, parquet byte size, parquet SHA-256, row
76
+ count, column count, schema hash, writer version, created time, and normalized
77
+ `source_sql` for SQL-backed entries.
78
+ - Readers reject missing, stale, unsupported, or corrupt metadata and validate
79
+ parquet size/checksum/row count/schema hash before returning a warm hit.
80
+ - Invalidates stale cache entries when the source version changes.
81
+ - For file path arguments, the default source version is a SHA-256 content hash.
82
+ - SQL sources use normalized SQL for cache keys and `COUNT(*)` plus
83
+ `MAX(ORA_ROWSCN)` as the Oracle version token for the detected `FROM` table.
84
+ - Cold loads re-read the source version before and after loading and retry if
85
+ the source changes during the read.
86
+
87
+ ## Demo
88
+
89
+ Run:
90
+
91
+ ```bash
92
+ uv run --no-cache --no-sync python -m nfscache.util.main
93
+ ```
94
+
95
+ `main.py` runs:
96
+
97
+ 1. clear `__cache__`
98
+ 2. generate parquet source data
99
+ 3. cold load and write cache
100
+ 4. warm cache load
101
+ 5. regenerate parquet source data
102
+ 6. reload because the source hash changed
103
+ 7. warm cache load again
104
+
105
+ Expected shape:
106
+
107
+ ```text
108
+ Clearing cache: __cache__
109
+ Generating: parquet/A_TEST_1048576.parquet...
110
+ Reading: parquet/A_TEST_1048576.parquet...
111
+ Returning cached object: parquet/A_TEST_1048576.parquet sha=<first 40 chars>...
112
+ Generating: parquet/A_TEST_1048576.parquet...
113
+ Ignoring cache entry: parquet/A_TEST_1048576.parquet: stale source version
114
+ Reading: parquet/A_TEST_1048576.parquet...
115
+ Returning cached object: parquet/A_TEST_1048576.parquet sha=<first 40 chars>...
116
+ ```
117
+
118
+ ## Swarm Test
119
+
120
+ `swarm_file.py` tests a multi-client environment with process-level concurrency.
121
+ It mixes cache gets with source regeneration to simulate clients reading while
122
+ the source data changes.
123
+
124
+ Run the default swarm:
125
+
126
+ ```bash
127
+ uv run --no-cache --no-sync python -m nfscache.util.swarm_file
128
+ ```
129
+
130
+ Default behavior:
131
+
132
+ - 4 client processes
133
+ - 12 get waves
134
+ - 6 source regenerations
135
+ - generations are injected throughout the get waves
136
+ - final warm check after all waves complete
137
+
138
+ Useful smaller run:
139
+
140
+ ```bash
141
+ uv run --no-cache --no-sync python -m nfscache.util.swarm_file \
142
+ --clients 3 \
143
+ --generators 1 \
144
+ --gets-per-client 6 \
145
+ --generations 3 \
146
+ --n-rows 1024 \
147
+ --cols 6 \
148
+ --n-int-cols 2 \
149
+ --n-str-cols 1 \
150
+ --data-dir /tmp/parquet-nfs-wave-swarm-parquet \
151
+ --cache-dir /tmp/parquet-nfs-wave-swarm-cache
152
+ ```
153
+
154
+ Swarm output includes:
155
+
156
+ - source generation hash
157
+ - cold `Reading: ...` reloads after invalidation
158
+ - warm `Returning cached object: ... sha=...` hits
159
+ - final multi-client warm check
160
+
161
+ ## SQL Swarm Test
162
+
163
+ `swarm_sql.py` tests the same process-level concurrency path for Oracle-backed
164
+ SQL reads. It creates an Oracle table, runs client reads through `@nfscache.sql`,
165
+ and rewrites the table between read waves so the cache has to invalidate and
166
+ reload under load.
167
+
168
+ Start Oracle first, then run:
169
+
170
+ ```bash
171
+ uv run --no-cache --no-sync python -m nfscache.util.swarm_sql
172
+ ```
173
+
174
+ Useful smaller run:
175
+
176
+ ```bash
177
+ uv run --no-cache --no-sync python -m nfscache.util.swarm_sql \
178
+ --clients 2 \
179
+ --writers 1 \
180
+ --gets-per-client 3 \
181
+ --generations 2 \
182
+ --n-rows 128 \
183
+ --batch-size 64 \
184
+ --table SWARM_SQL_TEST \
185
+ --cache-dir /tmp/parquet-nfs-swarm-sql-cache
186
+ ```
187
+
188
+ SQL swarm output includes Oracle cold reads, writer SCNs, stale SQL cache
189
+ invalidation, warm cache hits, and a final multi-client warm check.
190
+
191
+ ## Tests
192
+
193
+ Run focused unit tests:
194
+
195
+ ```bash
196
+ uv run --no-cache --no-sync python -m unittest discover -s tests
197
+ ```
198
+
199
+ The tests cover authoritative metadata, corrupted metadata/parquet recovery,
200
+ normalized SQL metadata, overlapping warm readers, and writer-preference
201
+ locking. A syntax check for all modules:
202
+
203
+ ```bash
204
+ uv run --no-cache --no-sync python -m compileall -q nfscache tests
205
+ ```
206
+
207
+ ## Generate Parquets
208
+
209
+ Generate or replace test parquet files:
210
+
211
+ ```bash
212
+ uv run --no-cache --no-sync python -m nfscache.util.generate_parquets
213
+ ```
214
+
215
+ The generator writes to a unique `*.part` file and atomically replaces the final
216
+ parquet when the write is complete.
217
+
218
+ By default, content changes on every run. Use `--seed` for reproducible data:
219
+
220
+ ```bash
221
+ uv run --no-cache --no-sync python -m nfscache.util.generate_parquets --seed 123
222
+ ```
223
+
224
+ ## Oracle SQL Cache
225
+
226
+ Start the local Oracle demo container:
227
+
228
+ ```bash
229
+ ./build_and_run.sh [--wipe]
230
+ ```
231
+
232
+ Populate the demo table:
233
+
234
+ ```bash
235
+ uv run --no-cache --no-sync python -m nfscache.database.oracle_write_container
236
+ ```
237
+
238
+ Read through the SQL cache:
239
+
240
+ ```bash
241
+ uv run --no-cache --no-sync python -m nfscache.database.oracle_read "select * from DATA_CONTAINER_DEMO"
242
+ ```
243
+
244
+ SQL cache keys use normalized SQL plus requested columns. Metadata stores the
245
+ normalized `source_sql`, and source versions use `COUNT(*)` plus
246
+ `MAX(ORA_ROWSCN)` for the detected `FROM` table.
247
+
248
+ ## Production Notes
249
+
250
+ This is not yet production-grade enterprise software.
251
+
252
+ For Oracle on NFS with many clients, the next important pieces are:
253
+
254
+ - validate `mkdir` lock tokens, writer intent, stale-lock recovery, and
255
+ `os.replace` semantics on the actual NFS mount
256
+ - tie long Oracle reads to a documented consistent SCN/snapshot strategy
257
+ - add structured logs and metrics for hit/miss/reload, reader/writer lock wait,
258
+ cold load duration, parquet write/read duration, and corruption/retry counts
259
+ - broaden automated failure tests for crashed lock holders, corrupted files,
260
+ source changes during cold load, and multi-host NFS integration
261
+ - add operational controls for cache retention, quotas, old `*.part` cleanup,
262
+ version migration, compression, permissions, and bad-key runbooks
@@ -0,0 +1,80 @@
1
+ #!/bin/bash
2
+ # build_and_run.sh
3
+ set -euo pipefail
4
+
5
+ IMAGE="oracle-scn-test"
6
+ CONTAINER="oracle-scn"
7
+ VOLUME="oracle_scn_data"
8
+ ORACLE_PASSWORD="${ORACLE_PASSWORD:-AdminPassword123}"
9
+ APP_USER="SOMEUSER"
10
+ APP_PASSWORD="cache"
11
+ WIPE=0
12
+
13
+ usage() {
14
+ echo "Usage: $0 [--wipe]"
15
+ echo
16
+ echo " --wipe Remove the existing $VOLUME Docker volume before starting Oracle."
17
+ echo
18
+ echo "Environment:"
19
+ echo " ORACLE_PLATFORM=linux/amd64 Optional Docker platform override."
20
+ }
21
+
22
+ while [[ $# -gt 0 ]]; do
23
+ case "$1" in
24
+ --wipe)
25
+ WIPE=1
26
+ ;;
27
+ -h|--help)
28
+ usage
29
+ exit 0
30
+ ;;
31
+ *)
32
+ echo "Unknown argument: $1" >&2
33
+ usage >&2
34
+ exit 2
35
+ ;;
36
+ esac
37
+ shift
38
+ done
39
+
40
+ if [[ -n "${ORACLE_PLATFORM:-}" ]]; then
41
+ docker build --platform="$ORACLE_PLATFORM" -t "$IMAGE" .
42
+ else
43
+ docker build -t "$IMAGE" .
44
+ fi
45
+
46
+ # If container already exists, remove it (keep volume unless you wipe it explicitly)
47
+ if docker ps -a --format '{{.Names}}' | grep -qx "$CONTAINER"; then
48
+ docker rm -f "$CONTAINER"
49
+ fi
50
+
51
+ if [[ "$WIPE" == "1" ]] && docker volume inspect "$VOLUME" >/dev/null 2>&1; then
52
+ docker volume rm "$VOLUME"
53
+ fi
54
+
55
+ if [[ -n "${ORACLE_PLATFORM:-}" ]]; then
56
+ docker run -d --name "$CONTAINER" \
57
+ --platform="$ORACLE_PLATFORM" \
58
+ -p 1521:1521 \
59
+ --shm-size=1g \
60
+ -e ORACLE_PASSWORD="$ORACLE_PASSWORD" \
61
+ -v "$VOLUME":/opt/oracle/oradata \
62
+ "$IMAGE"
63
+ else
64
+ docker run -d --name "$CONTAINER" \
65
+ -p 1521:1521 \
66
+ --shm-size=1g \
67
+ -e ORACLE_PASSWORD="$ORACLE_PASSWORD" \
68
+ -v "$VOLUME":/opt/oracle/oradata \
69
+ "$IMAGE"
70
+ fi
71
+
72
+ echo "Oracle container started:"
73
+ echo " name: $CONTAINER"
74
+ echo " image: $IMAGE"
75
+ echo " port: 1521"
76
+ echo " service: FREEPDB1"
77
+ echo " volume: $VOLUME"
78
+ echo " platform: ${ORACLE_PLATFORM:-native}"
79
+ echo " user: $APP_USER"
80
+ echo " pass: $APP_PASSWORD"
@@ -0,0 +1,13 @@
1
+ -- Run as SYS (per image behavior)
2
+ ALTER SESSION SET CONTAINER=FREEPDB1;
3
+
4
+ -- App user for SCN-based cache tests
5
+ CREATE USER SOMEUSER IDENTIFIED BY cache
6
+ DEFAULT TABLESPACE USERS
7
+ QUOTA UNLIMITED ON USERS;
8
+
9
+ GRANT CREATE SESSION TO SOMEUSER;
10
+ GRANT CREATE TABLE, CREATE SEQUENCE, CREATE PROCEDURE, CREATE VIEW, CREATE TRIGGER TO SOMEUSER;
11
+
12
+ -- SELECT current_scn FROM v$database)
13
+ GRANT SELECT ON V_$DATABASE TO SOMEUSER;
@@ -0,0 +1,3 @@
1
+ from nfscache.nfs_cache import NFSCache
2
+
3
+ __all__ = ["NFSCache"]