jsonjsdb 0.7.4__tar.gz → 0.8.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: jsonjsdb
3
- Version: 0.7.4
3
+ Version: 0.8.0
4
4
  Summary: Python library for JSONJS database loading
5
5
  Project-URL: Homepage, https://github.com/datannur/jsonjsdb
6
6
  Project-URL: Repository, https://github.com/datannur/jsonjsdb
@@ -182,6 +182,27 @@ db.save(evolution_xlsx=Path("path/to/evolution.xlsx"))
182
182
  db.save(timestamp=1741186800)
183
183
  ```
184
184
 
185
+ #### Cascade Filtering
186
+
187
+ When a parent entity is added or deleted, all child entities are also added/deleted.
188
+ By default, this creates noise in the evolution log. Use `parent_relations` to automatically
189
+ filter out cascade entries:
190
+
191
+ ```python
192
+ db.save(
193
+ parent_relations={
194
+ "variable": "dataset", # variable.dataset_id → dataset
195
+ "freq": "variable", # freq.variable_id → variable
196
+ }
197
+ )
198
+ ```
199
+
200
+ With cascade filtering:
201
+ - Adding a dataset with 50 variables logs only 1 entry (the dataset add)
202
+ - Deleting a dataset logs only the parent delete, not all child deletes
203
+ - Updates are always logged (no filtering)
204
+ - Explicit child additions (to existing parent) are still logged
205
+
185
206
  When `evolution_xlsx` is provided:
186
207
  - The xlsx file becomes the source of truth (read from xlsx if it exists)
187
208
  - User edits made in Excel are preserved on subsequent saves
@@ -196,6 +217,7 @@ Evolution format:
196
217
  "entity": "user",
197
218
  "entity_id": "user_2",
198
219
  "parent_entity_id": null,
220
+ "parent_entity": null,
199
221
  "variable": null,
200
222
  "old_value": null,
201
223
  "new_value": null,
@@ -204,12 +226,13 @@ Evolution format:
204
226
  {
205
227
  "timestamp": 1741186800,
206
228
  "type": "update",
207
- "entity": "user",
208
- "entity_id": "user_1",
209
- "parent_entity_id": null,
210
- "variable": "score",
211
- "old_value": 100,
212
- "new_value": 200,
229
+ "entity": "variable",
230
+ "entity_id": "var_1",
231
+ "parent_entity_id": "ds_1",
232
+ "parent_entity": "dataset",
233
+ "variable": "name",
234
+ "old_value": "Old Name",
235
+ "new_value": "New Name",
213
236
  "name": null
214
237
  }
215
238
  ]
@@ -157,6 +157,27 @@ db.save(evolution_xlsx=Path("path/to/evolution.xlsx"))
157
157
  db.save(timestamp=1741186800)
158
158
  ```
159
159
 
160
+ #### Cascade Filtering
161
+
162
+ When a parent entity is added or deleted, all child entities are also added/deleted.
163
+ By default, this creates noise in the evolution log. Use `parent_relations` to automatically
164
+ filter out cascade entries:
165
+
166
+ ```python
167
+ db.save(
168
+ parent_relations={
169
+ "variable": "dataset", # variable.dataset_id → dataset
170
+ "freq": "variable", # freq.variable_id → variable
171
+ }
172
+ )
173
+ ```
174
+
175
+ With cascade filtering:
176
+ - Adding a dataset with 50 variables logs only 1 entry (the dataset add)
177
+ - Deleting a dataset logs only the parent delete, not all child deletes
178
+ - Updates are always logged (no filtering)
179
+ - Explicit child additions (to existing parent) are still logged
180
+
160
181
  When `evolution_xlsx` is provided:
161
182
  - The xlsx file becomes the source of truth (read from xlsx if it exists)
162
183
  - User edits made in Excel are preserved on subsequent saves
@@ -171,6 +192,7 @@ Evolution format:
171
192
  "entity": "user",
172
193
  "entity_id": "user_2",
173
194
  "parent_entity_id": null,
195
+ "parent_entity": null,
174
196
  "variable": null,
175
197
  "old_value": null,
176
198
  "new_value": null,
@@ -179,12 +201,13 @@ Evolution format:
179
201
  {
180
202
  "timestamp": 1741186800,
181
203
  "type": "update",
182
- "entity": "user",
183
- "entity_id": "user_1",
184
- "parent_entity_id": null,
185
- "variable": "score",
186
- "old_value": 100,
187
- "new_value": 200,
204
+ "entity": "variable",
205
+ "entity_id": "var_1",
206
+ "parent_entity_id": "ds_1",
207
+ "parent_entity": "dataset",
208
+ "variable": "name",
209
+ "old_value": "Old Name",
210
+ "new_value": "New Name",
188
211
  "name": null
189
212
  }
190
213
  ]
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "jsonjsdb"
7
- version = "0.7.4"
7
+ version = "0.8.0"
8
8
  description = "Python library for JSONJS database loading"
9
9
  authors = [{ name = "datannur" }]
10
10
  readme = "README.md"
@@ -10,6 +10,7 @@ import polars as pl
10
10
  from .evolution import (
11
11
  EvolutionEntry,
12
12
  compare_datasets,
13
+ filter_cascade_entries,
13
14
  get_timestamp,
14
15
  load_evolution,
15
16
  save_evolution,
@@ -108,6 +109,7 @@ class Jsonjsdb:
108
109
  evolution_xlsx: Path | str | None = None,
109
110
  timestamp: int | None = None,
110
111
  write_js: bool = True,
112
+ parent_relations: dict[str, str] | None = None,
111
113
  ) -> None:
112
114
  """Save all tables to disk with optional evolution tracking.
113
115
 
@@ -120,6 +122,8 @@ class Jsonjsdb:
120
122
  evolution_xlsx: Optional path for evolution.xlsx output
121
123
  timestamp: Optional timestamp override for deterministic outputs
122
124
  write_js: If True, write both .json and .json.js (default: True)
125
+ parent_relations: Child->parent table mapping for cascade filtering
126
+ Example: {"variable": "dataset", "freq": "variable"}
123
127
  """
124
128
  save_path = Path(path) if path else self._path
125
129
 
@@ -147,7 +151,9 @@ class Jsonjsdb:
147
151
  # Track evolution if enabled
148
152
  if track_evolution:
149
153
  old_df = self._get_old_table(save_path, name, same_path)
150
- entries = compare_datasets(old_df, persistable_df, ts, name)
154
+ entries = compare_datasets(
155
+ old_df, persistable_df, ts, name, parent_relations
156
+ )
151
157
  new_entries.extend(entries)
152
158
 
153
159
  write_table_json(persistable_df, save_path / f"{name}.json")
@@ -159,6 +165,10 @@ class Jsonjsdb:
159
165
  self._original_snapshots[name] = persistable_df.clone()
160
166
 
161
167
  # Save evolution if there are new entries
168
+ if track_evolution and new_entries:
169
+ # Filter cascade entries (child add/delete when parent has same operation)
170
+ new_entries = filter_cascade_entries(new_entries)
171
+
162
172
  if track_evolution and new_entries:
163
173
  xlsx_path = Path(evolution_xlsx) if evolution_xlsx else None
164
174
  existing_entries = load_evolution(save_path, xlsx_path)
@@ -27,6 +27,7 @@ class EvolutionEntry:
27
27
  entity: str
28
28
  entity_id: str | int
29
29
  parent_entity_id: str | int | None
30
+ parent_entity: str | None
30
31
  variable: str | None
31
32
  old_value: Any
32
33
  new_value: Any
@@ -45,13 +46,40 @@ def _standardize_id(id_value: str) -> str:
45
46
  return INVALID_ID_PATTERN.sub("", trimmed)
46
47
 
47
48
 
48
- def _get_first_parent_id(row: dict[str, Any]) -> str | int | None:
49
- """Find the first foreign key column (_id or Id suffix) and return its value."""
49
+ def _get_parent_info(
50
+ row: dict[str, Any],
51
+ entity: str,
52
+ parent_relations: dict[str, str] | None,
53
+ ) -> tuple[str | None, str | int | None]:
54
+ """Get parent entity and id based on config or FK convention.
55
+
56
+ Args:
57
+ row: Row data
58
+ entity: Current entity/table name
59
+ parent_relations: Mapping of child_table -> parent_table
60
+
61
+ Returns:
62
+ Tuple of (parent_entity, parent_entity_id)
63
+ """
64
+ if parent_relations and entity in parent_relations:
65
+ parent_entity = parent_relations[entity]
66
+ fk_col = f"{parent_entity}_id"
67
+ parent_id = row.get(fk_col)
68
+ if isinstance(parent_id, (str, int)):
69
+ return (parent_entity, parent_id)
70
+ return (parent_entity, None)
71
+
72
+ # Fallback: auto-detect from first FK column
50
73
  for key, value in row.items():
51
- if key.endswith("_id") or key.endswith("Id"):
74
+ if key.endswith("_id") and key != "id":
75
+ parent_entity = key[:-3] # strip "_id"
52
76
  if isinstance(value, (str, int)):
53
- return value
54
- return None
77
+ return (parent_entity, value)
78
+ elif key.endswith("Id"):
79
+ parent_entity = key[:-2] # strip "Id"
80
+ if isinstance(value, (str, int)):
81
+ return (parent_entity, value)
82
+ return (None, None)
55
83
 
56
84
 
57
85
  def _add_composite_id_if_missing(df: pl.DataFrame) -> tuple[pl.DataFrame, bool]:
@@ -109,6 +137,7 @@ def compare_datasets(
109
137
  new_df: pl.DataFrame,
110
138
  timestamp: int,
111
139
  entity: str,
140
+ parent_relations: dict[str, str] | None = None,
112
141
  ) -> list[EvolutionEntry]:
113
142
  """Compare two datasets and return list of evolution entries.
114
143
 
@@ -117,6 +146,7 @@ def compare_datasets(
117
146
  new_df: New version of the data
118
147
  timestamp: Unix timestamp in seconds
119
148
  entity: Table/entity name
149
+ parent_relations: Mapping of child_table -> parent_table for cascade filtering
120
150
 
121
151
  Returns:
122
152
  List of EvolutionEntry objects describing the changes
@@ -130,8 +160,6 @@ def compare_datasets(
130
160
  if old_df.is_empty():
131
161
  return entries
132
162
 
133
- # Normalize IDs for consistent comparison
134
-
135
163
  # Normalize id columns to string for consistent comparison
136
164
  old_df = _normalize_id_column(old_df)
137
165
  new_df = _normalize_id_column(new_df)
@@ -174,6 +202,9 @@ def compare_datasets(
174
202
  if _values_are_empty(old_value, new_value):
175
203
  continue
176
204
 
205
+ parent_entity, parent_id = _get_parent_info(
206
+ obj_new, entity, parent_relations
207
+ )
177
208
  entries.append(
178
209
  EvolutionEntry(
179
210
  timestamp=timestamp,
@@ -185,8 +216,11 @@ def compare_datasets(
185
216
  else entity_id
186
217
  ),
187
218
  parent_entity_id=(
188
- str(entity_id).split("---")[0] if has_composite_id else None
219
+ str(entity_id).split("---")[0]
220
+ if has_composite_id
221
+ else parent_id
189
222
  ),
223
+ parent_entity=None if has_composite_id else parent_entity,
190
224
  variable=variable,
191
225
  old_value=old_value,
192
226
  new_value=new_value,
@@ -197,6 +231,7 @@ def compare_datasets(
197
231
  # Detect additions
198
232
  for entity_id in ids_added:
199
233
  obj_new = map_new[entity_id]
234
+ parent_entity, parent_id = _get_parent_info(obj_new, entity, parent_relations)
200
235
  entries.append(
201
236
  EvolutionEntry(
202
237
  timestamp=timestamp,
@@ -206,8 +241,9 @@ def compare_datasets(
206
241
  _standardize_id(str(entity_id)) if has_composite_id else entity_id
207
242
  ),
208
243
  parent_entity_id=(
209
- str(entity_id).split("---")[0] if has_composite_id else None
244
+ str(entity_id).split("---")[0] if has_composite_id else parent_id
210
245
  ),
246
+ parent_entity=None if has_composite_id else parent_entity,
211
247
  variable=None,
212
248
  old_value=None,
213
249
  new_value=None,
@@ -218,6 +254,7 @@ def compare_datasets(
218
254
  # Detect deletions
219
255
  for entity_id in ids_removed:
220
256
  obj_old = map_old[entity_id]
257
+ parent_entity, parent_id = _get_parent_info(obj_old, entity, parent_relations)
221
258
  entries.append(
222
259
  EvolutionEntry(
223
260
  timestamp=timestamp,
@@ -226,7 +263,10 @@ def compare_datasets(
226
263
  entity_id=(
227
264
  _standardize_id(str(entity_id)) if has_composite_id else entity_id
228
265
  ),
229
- parent_entity_id=_get_first_parent_id(obj_old),
266
+ parent_entity_id=(
267
+ str(entity_id).split("---")[0] if has_composite_id else parent_id
268
+ ),
269
+ parent_entity=None if has_composite_id else parent_entity,
230
270
  variable=None,
231
271
  old_value=None,
232
272
  new_value=None,
@@ -241,6 +281,51 @@ def compare_datasets(
241
281
  return entries
242
282
 
243
283
 
284
+ def filter_cascade_entries(entries: list[EvolutionEntry]) -> list[EvolutionEntry]:
285
+ """Filter out cascade add/delete entries where parent has same operation.
286
+
287
+ When a parent entity is added or deleted, child entities are also added/deleted.
288
+ This function removes child entries that are part of a cascade operation,
289
+ keeping only the meaningful parent-level changes.
290
+
291
+ Args:
292
+ entries: List of evolution entries to filter
293
+
294
+ Returns:
295
+ Filtered list with cascade entries removed
296
+ """
297
+ # Index parent operations: (timestamp, type, entity, entity_id)
298
+ parent_ops: set[tuple[int, str, str, str]] = {
299
+ (e.timestamp, e.type, e.entity, str(e.entity_id))
300
+ for e in entries
301
+ if e.type in ("add", "delete")
302
+ }
303
+
304
+ result: list[EvolutionEntry] = []
305
+ for entry in entries:
306
+ # Always keep updates
307
+ if entry.type == "update":
308
+ result.append(entry)
309
+ continue
310
+
311
+ # Keep entries without parent relation
312
+ if not entry.parent_entity or entry.parent_entity_id is None:
313
+ result.append(entry)
314
+ continue
315
+
316
+ # Check if parent has the same operation in this batch
317
+ parent_key = (
318
+ entry.timestamp,
319
+ entry.type,
320
+ entry.parent_entity,
321
+ str(entry.parent_entity_id),
322
+ )
323
+ if parent_key not in parent_ops:
324
+ result.append(entry)
325
+
326
+ return result
327
+
328
+
244
329
  def load_evolution(path: Path, xlsx_path: Path | None = None) -> list[EvolutionEntry]:
245
330
  """Load existing evolution entries.
246
331
 
@@ -264,6 +349,7 @@ def load_evolution(path: Path, xlsx_path: Path | None = None) -> list[EvolutionE
264
349
  entity=row["entity"],
265
350
  entity_id=row["entity_id"],
266
351
  parent_entity_id=row.get("parent_entity_id"),
352
+ parent_entity=row.get("parent_entity"),
267
353
  variable=row.get("variable"),
268
354
  old_value=row.get("old_value"),
269
355
  new_value=row.get("new_value"),
@@ -303,10 +389,11 @@ def load_evolution_xlsx(xlsx_path: Path) -> list[EvolutionEntry]:
303
389
  entity=str(row[2]) if row[2] else "",
304
390
  entity_id=str(row[3]) if row[3] else "",
305
391
  parent_entity_id=str(row[4]) if row[4] else None,
306
- variable=str(row[5]) if row[5] else None,
307
- old_value=row[6] if row[6] else None,
308
- new_value=row[7] if row[7] else None,
309
- name=str(row[8]) if row[8] else None,
392
+ parent_entity=str(row[5]) if row[5] else None,
393
+ variable=str(row[6]) if row[6] else None,
394
+ old_value=row[7] if row[7] else None,
395
+ new_value=row[8] if row[8] else None,
396
+ name=str(row[9]) if len(row) > 9 and row[9] else None,
310
397
  )
311
398
  )
312
399
  return entries
@@ -343,6 +430,7 @@ def save_evolution(
343
430
  "entity",
344
431
  "entity_id",
345
432
  "parent_entity_id",
433
+ "parent_entity",
346
434
  "variable",
347
435
  "old_value",
348
436
  "new_value",
@@ -356,6 +444,7 @@ def save_evolution(
356
444
  entry.entity,
357
445
  entry.entity_id,
358
446
  entry.parent_entity_id,
447
+ entry.parent_entity,
359
448
  entry.variable,
360
449
  entry.old_value,
361
450
  entry.new_value,
@@ -388,6 +477,7 @@ def write_evolution_xlsx(entries: list[EvolutionEntry], xlsx_path: Path) -> None
388
477
  "entity",
389
478
  "entity_id",
390
479
  "parent_entity_id",
480
+ "parent_entity",
391
481
  "variable",
392
482
  "old_value",
393
483
  "new_value",
@@ -404,6 +494,7 @@ def write_evolution_xlsx(entries: list[EvolutionEntry], xlsx_path: Path) -> None
404
494
  entry.entity,
405
495
  str(entry.entity_id) if entry.entity_id is not None else "",
406
496
  str(entry.parent_entity_id) if entry.parent_entity_id else "",
497
+ str(entry.parent_entity) if entry.parent_entity else "",
407
498
  str(entry.variable) if entry.variable else "",
408
499
  str(entry.old_value) if entry.old_value is not None else "",
409
500
  str(entry.new_value) if entry.new_value is not None else "",
File without changes
File without changes
File without changes
File without changes
File without changes