jsonjsdb 0.7.4__tar.gz → 0.8.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {jsonjsdb-0.7.4 → jsonjsdb-0.8.0}/PKG-INFO +30 -7
- {jsonjsdb-0.7.4 → jsonjsdb-0.8.0}/README.md +29 -6
- {jsonjsdb-0.7.4 → jsonjsdb-0.8.0}/pyproject.toml +1 -1
- {jsonjsdb-0.7.4 → jsonjsdb-0.8.0}/src/jsonjsdb/database.py +11 -1
- {jsonjsdb-0.7.4 → jsonjsdb-0.8.0}/src/jsonjsdb/evolution.py +105 -14
- {jsonjsdb-0.7.4 → jsonjsdb-0.8.0}/.gitignore +0 -0
- {jsonjsdb-0.7.4 → jsonjsdb-0.8.0}/LICENSE +0 -0
- {jsonjsdb-0.7.4 → jsonjsdb-0.8.0}/src/jsonjsdb/__init__.py +0 -0
- {jsonjsdb-0.7.4 → jsonjsdb-0.8.0}/src/jsonjsdb/loader.py +0 -0
- {jsonjsdb-0.7.4 → jsonjsdb-0.8.0}/src/jsonjsdb/py.typed +0 -0
- {jsonjsdb-0.7.4 → jsonjsdb-0.8.0}/src/jsonjsdb/table.py +0 -0
- {jsonjsdb-0.7.4 → jsonjsdb-0.8.0}/src/jsonjsdb/types.py +0 -0
- {jsonjsdb-0.7.4 → jsonjsdb-0.8.0}/src/jsonjsdb/writer.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: jsonjsdb
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.8.0
|
|
4
4
|
Summary: Python library for JSONJS database loading
|
|
5
5
|
Project-URL: Homepage, https://github.com/datannur/jsonjsdb
|
|
6
6
|
Project-URL: Repository, https://github.com/datannur/jsonjsdb
|
|
@@ -182,6 +182,27 @@ db.save(evolution_xlsx=Path("path/to/evolution.xlsx"))
|
|
|
182
182
|
db.save(timestamp=1741186800)
|
|
183
183
|
```
|
|
184
184
|
|
|
185
|
+
#### Cascade Filtering
|
|
186
|
+
|
|
187
|
+
When a parent entity is added or deleted, all child entities are also added/deleted.
|
|
188
|
+
By default, this creates noise in the evolution log. Use `parent_relations` to automatically
|
|
189
|
+
filter out cascade entries:
|
|
190
|
+
|
|
191
|
+
```python
|
|
192
|
+
db.save(
|
|
193
|
+
parent_relations={
|
|
194
|
+
"variable": "dataset", # variable.dataset_id → dataset
|
|
195
|
+
"freq": "variable", # freq.variable_id → variable
|
|
196
|
+
}
|
|
197
|
+
)
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
With cascade filtering:
|
|
201
|
+
- Adding a dataset with 50 variables logs only 1 entry (the dataset add)
|
|
202
|
+
- Deleting a dataset logs only the parent delete, not all child deletes
|
|
203
|
+
- Updates are always logged (no filtering)
|
|
204
|
+
- Explicit child additions (to existing parent) are still logged
|
|
205
|
+
|
|
185
206
|
When `evolution_xlsx` is provided:
|
|
186
207
|
- The xlsx file becomes the source of truth (read from xlsx if it exists)
|
|
187
208
|
- User edits made in Excel are preserved on subsequent saves
|
|
@@ -196,6 +217,7 @@ Evolution format:
|
|
|
196
217
|
"entity": "user",
|
|
197
218
|
"entity_id": "user_2",
|
|
198
219
|
"parent_entity_id": null,
|
|
220
|
+
"parent_entity": null,
|
|
199
221
|
"variable": null,
|
|
200
222
|
"old_value": null,
|
|
201
223
|
"new_value": null,
|
|
@@ -204,12 +226,13 @@ Evolution format:
|
|
|
204
226
|
{
|
|
205
227
|
"timestamp": 1741186800,
|
|
206
228
|
"type": "update",
|
|
207
|
-
"entity": "
|
|
208
|
-
"entity_id": "
|
|
209
|
-
"parent_entity_id":
|
|
210
|
-
"
|
|
211
|
-
"
|
|
212
|
-
"
|
|
229
|
+
"entity": "variable",
|
|
230
|
+
"entity_id": "var_1",
|
|
231
|
+
"parent_entity_id": "ds_1",
|
|
232
|
+
"parent_entity": "dataset",
|
|
233
|
+
"variable": "name",
|
|
234
|
+
"old_value": "Old Name",
|
|
235
|
+
"new_value": "New Name",
|
|
213
236
|
"name": null
|
|
214
237
|
}
|
|
215
238
|
]
|
|
@@ -157,6 +157,27 @@ db.save(evolution_xlsx=Path("path/to/evolution.xlsx"))
|
|
|
157
157
|
db.save(timestamp=1741186800)
|
|
158
158
|
```
|
|
159
159
|
|
|
160
|
+
#### Cascade Filtering
|
|
161
|
+
|
|
162
|
+
When a parent entity is added or deleted, all child entities are also added/deleted.
|
|
163
|
+
By default, this creates noise in the evolution log. Use `parent_relations` to automatically
|
|
164
|
+
filter out cascade entries:
|
|
165
|
+
|
|
166
|
+
```python
|
|
167
|
+
db.save(
|
|
168
|
+
parent_relations={
|
|
169
|
+
"variable": "dataset", # variable.dataset_id → dataset
|
|
170
|
+
"freq": "variable", # freq.variable_id → variable
|
|
171
|
+
}
|
|
172
|
+
)
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
With cascade filtering:
|
|
176
|
+
- Adding a dataset with 50 variables logs only 1 entry (the dataset add)
|
|
177
|
+
- Deleting a dataset logs only the parent delete, not all child deletes
|
|
178
|
+
- Updates are always logged (no filtering)
|
|
179
|
+
- Explicit child additions (to existing parent) are still logged
|
|
180
|
+
|
|
160
181
|
When `evolution_xlsx` is provided:
|
|
161
182
|
- The xlsx file becomes the source of truth (read from xlsx if it exists)
|
|
162
183
|
- User edits made in Excel are preserved on subsequent saves
|
|
@@ -171,6 +192,7 @@ Evolution format:
|
|
|
171
192
|
"entity": "user",
|
|
172
193
|
"entity_id": "user_2",
|
|
173
194
|
"parent_entity_id": null,
|
|
195
|
+
"parent_entity": null,
|
|
174
196
|
"variable": null,
|
|
175
197
|
"old_value": null,
|
|
176
198
|
"new_value": null,
|
|
@@ -179,12 +201,13 @@ Evolution format:
|
|
|
179
201
|
{
|
|
180
202
|
"timestamp": 1741186800,
|
|
181
203
|
"type": "update",
|
|
182
|
-
"entity": "
|
|
183
|
-
"entity_id": "
|
|
184
|
-
"parent_entity_id":
|
|
185
|
-
"
|
|
186
|
-
"
|
|
187
|
-
"
|
|
204
|
+
"entity": "variable",
|
|
205
|
+
"entity_id": "var_1",
|
|
206
|
+
"parent_entity_id": "ds_1",
|
|
207
|
+
"parent_entity": "dataset",
|
|
208
|
+
"variable": "name",
|
|
209
|
+
"old_value": "Old Name",
|
|
210
|
+
"new_value": "New Name",
|
|
188
211
|
"name": null
|
|
189
212
|
}
|
|
190
213
|
]
|
|
@@ -10,6 +10,7 @@ import polars as pl
|
|
|
10
10
|
from .evolution import (
|
|
11
11
|
EvolutionEntry,
|
|
12
12
|
compare_datasets,
|
|
13
|
+
filter_cascade_entries,
|
|
13
14
|
get_timestamp,
|
|
14
15
|
load_evolution,
|
|
15
16
|
save_evolution,
|
|
@@ -108,6 +109,7 @@ class Jsonjsdb:
|
|
|
108
109
|
evolution_xlsx: Path | str | None = None,
|
|
109
110
|
timestamp: int | None = None,
|
|
110
111
|
write_js: bool = True,
|
|
112
|
+
parent_relations: dict[str, str] | None = None,
|
|
111
113
|
) -> None:
|
|
112
114
|
"""Save all tables to disk with optional evolution tracking.
|
|
113
115
|
|
|
@@ -120,6 +122,8 @@ class Jsonjsdb:
|
|
|
120
122
|
evolution_xlsx: Optional path for evolution.xlsx output
|
|
121
123
|
timestamp: Optional timestamp override for deterministic outputs
|
|
122
124
|
write_js: If True, write both .json and .json.js (default: True)
|
|
125
|
+
parent_relations: Child->parent table mapping for cascade filtering
|
|
126
|
+
Example: {"variable": "dataset", "freq": "variable"}
|
|
123
127
|
"""
|
|
124
128
|
save_path = Path(path) if path else self._path
|
|
125
129
|
|
|
@@ -147,7 +151,9 @@ class Jsonjsdb:
|
|
|
147
151
|
# Track evolution if enabled
|
|
148
152
|
if track_evolution:
|
|
149
153
|
old_df = self._get_old_table(save_path, name, same_path)
|
|
150
|
-
entries = compare_datasets(
|
|
154
|
+
entries = compare_datasets(
|
|
155
|
+
old_df, persistable_df, ts, name, parent_relations
|
|
156
|
+
)
|
|
151
157
|
new_entries.extend(entries)
|
|
152
158
|
|
|
153
159
|
write_table_json(persistable_df, save_path / f"{name}.json")
|
|
@@ -159,6 +165,10 @@ class Jsonjsdb:
|
|
|
159
165
|
self._original_snapshots[name] = persistable_df.clone()
|
|
160
166
|
|
|
161
167
|
# Save evolution if there are new entries
|
|
168
|
+
if track_evolution and new_entries:
|
|
169
|
+
# Filter cascade entries (child add/delete when parent has same operation)
|
|
170
|
+
new_entries = filter_cascade_entries(new_entries)
|
|
171
|
+
|
|
162
172
|
if track_evolution and new_entries:
|
|
163
173
|
xlsx_path = Path(evolution_xlsx) if evolution_xlsx else None
|
|
164
174
|
existing_entries = load_evolution(save_path, xlsx_path)
|
|
@@ -27,6 +27,7 @@ class EvolutionEntry:
|
|
|
27
27
|
entity: str
|
|
28
28
|
entity_id: str | int
|
|
29
29
|
parent_entity_id: str | int | None
|
|
30
|
+
parent_entity: str | None
|
|
30
31
|
variable: str | None
|
|
31
32
|
old_value: Any
|
|
32
33
|
new_value: Any
|
|
@@ -45,13 +46,40 @@ def _standardize_id(id_value: str) -> str:
|
|
|
45
46
|
return INVALID_ID_PATTERN.sub("", trimmed)
|
|
46
47
|
|
|
47
48
|
|
|
48
|
-
def
|
|
49
|
-
|
|
49
|
+
def _get_parent_info(
|
|
50
|
+
row: dict[str, Any],
|
|
51
|
+
entity: str,
|
|
52
|
+
parent_relations: dict[str, str] | None,
|
|
53
|
+
) -> tuple[str | None, str | int | None]:
|
|
54
|
+
"""Get parent entity and id based on config or FK convention.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
row: Row data
|
|
58
|
+
entity: Current entity/table name
|
|
59
|
+
parent_relations: Mapping of child_table -> parent_table
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
Tuple of (parent_entity, parent_entity_id)
|
|
63
|
+
"""
|
|
64
|
+
if parent_relations and entity in parent_relations:
|
|
65
|
+
parent_entity = parent_relations[entity]
|
|
66
|
+
fk_col = f"{parent_entity}_id"
|
|
67
|
+
parent_id = row.get(fk_col)
|
|
68
|
+
if isinstance(parent_id, (str, int)):
|
|
69
|
+
return (parent_entity, parent_id)
|
|
70
|
+
return (parent_entity, None)
|
|
71
|
+
|
|
72
|
+
# Fallback: auto-detect from first FK column
|
|
50
73
|
for key, value in row.items():
|
|
51
|
-
if key.endswith("_id")
|
|
74
|
+
if key.endswith("_id") and key != "id":
|
|
75
|
+
parent_entity = key[:-3] # strip "_id"
|
|
52
76
|
if isinstance(value, (str, int)):
|
|
53
|
-
return value
|
|
54
|
-
|
|
77
|
+
return (parent_entity, value)
|
|
78
|
+
elif key.endswith("Id"):
|
|
79
|
+
parent_entity = key[:-2] # strip "Id"
|
|
80
|
+
if isinstance(value, (str, int)):
|
|
81
|
+
return (parent_entity, value)
|
|
82
|
+
return (None, None)
|
|
55
83
|
|
|
56
84
|
|
|
57
85
|
def _add_composite_id_if_missing(df: pl.DataFrame) -> tuple[pl.DataFrame, bool]:
|
|
@@ -109,6 +137,7 @@ def compare_datasets(
|
|
|
109
137
|
new_df: pl.DataFrame,
|
|
110
138
|
timestamp: int,
|
|
111
139
|
entity: str,
|
|
140
|
+
parent_relations: dict[str, str] | None = None,
|
|
112
141
|
) -> list[EvolutionEntry]:
|
|
113
142
|
"""Compare two datasets and return list of evolution entries.
|
|
114
143
|
|
|
@@ -117,6 +146,7 @@ def compare_datasets(
|
|
|
117
146
|
new_df: New version of the data
|
|
118
147
|
timestamp: Unix timestamp in seconds
|
|
119
148
|
entity: Table/entity name
|
|
149
|
+
parent_relations: Mapping of child_table -> parent_table for cascade filtering
|
|
120
150
|
|
|
121
151
|
Returns:
|
|
122
152
|
List of EvolutionEntry objects describing the changes
|
|
@@ -130,8 +160,6 @@ def compare_datasets(
|
|
|
130
160
|
if old_df.is_empty():
|
|
131
161
|
return entries
|
|
132
162
|
|
|
133
|
-
# Normalize IDs for consistent comparison
|
|
134
|
-
|
|
135
163
|
# Normalize id columns to string for consistent comparison
|
|
136
164
|
old_df = _normalize_id_column(old_df)
|
|
137
165
|
new_df = _normalize_id_column(new_df)
|
|
@@ -174,6 +202,9 @@ def compare_datasets(
|
|
|
174
202
|
if _values_are_empty(old_value, new_value):
|
|
175
203
|
continue
|
|
176
204
|
|
|
205
|
+
parent_entity, parent_id = _get_parent_info(
|
|
206
|
+
obj_new, entity, parent_relations
|
|
207
|
+
)
|
|
177
208
|
entries.append(
|
|
178
209
|
EvolutionEntry(
|
|
179
210
|
timestamp=timestamp,
|
|
@@ -185,8 +216,11 @@ def compare_datasets(
|
|
|
185
216
|
else entity_id
|
|
186
217
|
),
|
|
187
218
|
parent_entity_id=(
|
|
188
|
-
str(entity_id).split("---")[0]
|
|
219
|
+
str(entity_id).split("---")[0]
|
|
220
|
+
if has_composite_id
|
|
221
|
+
else parent_id
|
|
189
222
|
),
|
|
223
|
+
parent_entity=None if has_composite_id else parent_entity,
|
|
190
224
|
variable=variable,
|
|
191
225
|
old_value=old_value,
|
|
192
226
|
new_value=new_value,
|
|
@@ -197,6 +231,7 @@ def compare_datasets(
|
|
|
197
231
|
# Detect additions
|
|
198
232
|
for entity_id in ids_added:
|
|
199
233
|
obj_new = map_new[entity_id]
|
|
234
|
+
parent_entity, parent_id = _get_parent_info(obj_new, entity, parent_relations)
|
|
200
235
|
entries.append(
|
|
201
236
|
EvolutionEntry(
|
|
202
237
|
timestamp=timestamp,
|
|
@@ -206,8 +241,9 @@ def compare_datasets(
|
|
|
206
241
|
_standardize_id(str(entity_id)) if has_composite_id else entity_id
|
|
207
242
|
),
|
|
208
243
|
parent_entity_id=(
|
|
209
|
-
str(entity_id).split("---")[0] if has_composite_id else
|
|
244
|
+
str(entity_id).split("---")[0] if has_composite_id else parent_id
|
|
210
245
|
),
|
|
246
|
+
parent_entity=None if has_composite_id else parent_entity,
|
|
211
247
|
variable=None,
|
|
212
248
|
old_value=None,
|
|
213
249
|
new_value=None,
|
|
@@ -218,6 +254,7 @@ def compare_datasets(
|
|
|
218
254
|
# Detect deletions
|
|
219
255
|
for entity_id in ids_removed:
|
|
220
256
|
obj_old = map_old[entity_id]
|
|
257
|
+
parent_entity, parent_id = _get_parent_info(obj_old, entity, parent_relations)
|
|
221
258
|
entries.append(
|
|
222
259
|
EvolutionEntry(
|
|
223
260
|
timestamp=timestamp,
|
|
@@ -226,7 +263,10 @@ def compare_datasets(
|
|
|
226
263
|
entity_id=(
|
|
227
264
|
_standardize_id(str(entity_id)) if has_composite_id else entity_id
|
|
228
265
|
),
|
|
229
|
-
parent_entity_id=
|
|
266
|
+
parent_entity_id=(
|
|
267
|
+
str(entity_id).split("---")[0] if has_composite_id else parent_id
|
|
268
|
+
),
|
|
269
|
+
parent_entity=None if has_composite_id else parent_entity,
|
|
230
270
|
variable=None,
|
|
231
271
|
old_value=None,
|
|
232
272
|
new_value=None,
|
|
@@ -241,6 +281,51 @@ def compare_datasets(
|
|
|
241
281
|
return entries
|
|
242
282
|
|
|
243
283
|
|
|
284
|
+
def filter_cascade_entries(entries: list[EvolutionEntry]) -> list[EvolutionEntry]:
|
|
285
|
+
"""Filter out cascade add/delete entries where parent has same operation.
|
|
286
|
+
|
|
287
|
+
When a parent entity is added or deleted, child entities are also added/deleted.
|
|
288
|
+
This function removes child entries that are part of a cascade operation,
|
|
289
|
+
keeping only the meaningful parent-level changes.
|
|
290
|
+
|
|
291
|
+
Args:
|
|
292
|
+
entries: List of evolution entries to filter
|
|
293
|
+
|
|
294
|
+
Returns:
|
|
295
|
+
Filtered list with cascade entries removed
|
|
296
|
+
"""
|
|
297
|
+
# Index parent operations: (timestamp, type, entity, entity_id)
|
|
298
|
+
parent_ops: set[tuple[int, str, str, str]] = {
|
|
299
|
+
(e.timestamp, e.type, e.entity, str(e.entity_id))
|
|
300
|
+
for e in entries
|
|
301
|
+
if e.type in ("add", "delete")
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
result: list[EvolutionEntry] = []
|
|
305
|
+
for entry in entries:
|
|
306
|
+
# Always keep updates
|
|
307
|
+
if entry.type == "update":
|
|
308
|
+
result.append(entry)
|
|
309
|
+
continue
|
|
310
|
+
|
|
311
|
+
# Keep entries without parent relation
|
|
312
|
+
if not entry.parent_entity or entry.parent_entity_id is None:
|
|
313
|
+
result.append(entry)
|
|
314
|
+
continue
|
|
315
|
+
|
|
316
|
+
# Check if parent has the same operation in this batch
|
|
317
|
+
parent_key = (
|
|
318
|
+
entry.timestamp,
|
|
319
|
+
entry.type,
|
|
320
|
+
entry.parent_entity,
|
|
321
|
+
str(entry.parent_entity_id),
|
|
322
|
+
)
|
|
323
|
+
if parent_key not in parent_ops:
|
|
324
|
+
result.append(entry)
|
|
325
|
+
|
|
326
|
+
return result
|
|
327
|
+
|
|
328
|
+
|
|
244
329
|
def load_evolution(path: Path, xlsx_path: Path | None = None) -> list[EvolutionEntry]:
|
|
245
330
|
"""Load existing evolution entries.
|
|
246
331
|
|
|
@@ -264,6 +349,7 @@ def load_evolution(path: Path, xlsx_path: Path | None = None) -> list[EvolutionE
|
|
|
264
349
|
entity=row["entity"],
|
|
265
350
|
entity_id=row["entity_id"],
|
|
266
351
|
parent_entity_id=row.get("parent_entity_id"),
|
|
352
|
+
parent_entity=row.get("parent_entity"),
|
|
267
353
|
variable=row.get("variable"),
|
|
268
354
|
old_value=row.get("old_value"),
|
|
269
355
|
new_value=row.get("new_value"),
|
|
@@ -303,10 +389,11 @@ def load_evolution_xlsx(xlsx_path: Path) -> list[EvolutionEntry]:
|
|
|
303
389
|
entity=str(row[2]) if row[2] else "",
|
|
304
390
|
entity_id=str(row[3]) if row[3] else "",
|
|
305
391
|
parent_entity_id=str(row[4]) if row[4] else None,
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
392
|
+
parent_entity=str(row[5]) if row[5] else None,
|
|
393
|
+
variable=str(row[6]) if row[6] else None,
|
|
394
|
+
old_value=row[7] if row[7] else None,
|
|
395
|
+
new_value=row[8] if row[8] else None,
|
|
396
|
+
name=str(row[9]) if len(row) > 9 and row[9] else None,
|
|
310
397
|
)
|
|
311
398
|
)
|
|
312
399
|
return entries
|
|
@@ -343,6 +430,7 @@ def save_evolution(
|
|
|
343
430
|
"entity",
|
|
344
431
|
"entity_id",
|
|
345
432
|
"parent_entity_id",
|
|
433
|
+
"parent_entity",
|
|
346
434
|
"variable",
|
|
347
435
|
"old_value",
|
|
348
436
|
"new_value",
|
|
@@ -356,6 +444,7 @@ def save_evolution(
|
|
|
356
444
|
entry.entity,
|
|
357
445
|
entry.entity_id,
|
|
358
446
|
entry.parent_entity_id,
|
|
447
|
+
entry.parent_entity,
|
|
359
448
|
entry.variable,
|
|
360
449
|
entry.old_value,
|
|
361
450
|
entry.new_value,
|
|
@@ -388,6 +477,7 @@ def write_evolution_xlsx(entries: list[EvolutionEntry], xlsx_path: Path) -> None
|
|
|
388
477
|
"entity",
|
|
389
478
|
"entity_id",
|
|
390
479
|
"parent_entity_id",
|
|
480
|
+
"parent_entity",
|
|
391
481
|
"variable",
|
|
392
482
|
"old_value",
|
|
393
483
|
"new_value",
|
|
@@ -404,6 +494,7 @@ def write_evolution_xlsx(entries: list[EvolutionEntry], xlsx_path: Path) -> None
|
|
|
404
494
|
entry.entity,
|
|
405
495
|
str(entry.entity_id) if entry.entity_id is not None else "",
|
|
406
496
|
str(entry.parent_entity_id) if entry.parent_entity_id else "",
|
|
497
|
+
str(entry.parent_entity) if entry.parent_entity else "",
|
|
407
498
|
str(entry.variable) if entry.variable else "",
|
|
408
499
|
str(entry.old_value) if entry.old_value is not None else "",
|
|
409
500
|
str(entry.new_value) if entry.new_value is not None else "",
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|