jsonjsdb 0.7.3__tar.gz → 0.8.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: jsonjsdb
3
- Version: 0.7.3
3
+ Version: 0.8.0
4
4
  Summary: Python library for JSONJS database loading
5
5
  Project-URL: Homepage, https://github.com/datannur/jsonjsdb
6
6
  Project-URL: Repository, https://github.com/datannur/jsonjsdb
@@ -172,6 +172,9 @@ db.save()
172
172
  # Disable tracking
173
173
  db.save(track_evolution=False)
174
174
 
175
+ # Skip .json.js files (faster, smaller output)
176
+ db.save(write_js=False)
177
+
175
178
  # Use Excel as source (for easy editing of logs)
176
179
  db.save(evolution_xlsx=Path("path/to/evolution.xlsx"))
177
180
 
@@ -179,6 +182,27 @@ db.save(evolution_xlsx=Path("path/to/evolution.xlsx"))
179
182
  db.save(timestamp=1741186800)
180
183
  ```
181
184
 
185
+ #### Cascade Filtering
186
+
187
+ When a parent entity is added or deleted, all child entities are also added/deleted.
188
+ By default, this creates noise in the evolution log. Use `parent_relations` to automatically
189
+ filter out cascade entries:
190
+
191
+ ```python
192
+ db.save(
193
+ parent_relations={
194
+ "variable": "dataset", # variable.dataset_id → dataset
195
+ "freq": "variable", # freq.variable_id → variable
196
+ }
197
+ )
198
+ ```
199
+
200
+ With cascade filtering:
201
+ - Adding a dataset with 50 variables logs only 1 entry (the dataset add)
202
+ - Deleting a dataset logs only the parent delete, not all child deletes
203
+ - Updates are always logged (no filtering)
204
+ - Explicit child additions (to existing parent) are still logged
205
+
182
206
  When `evolution_xlsx` is provided:
183
207
  - The xlsx file becomes the source of truth (read from xlsx if it exists)
184
208
  - User edits made in Excel are preserved on subsequent saves
@@ -193,6 +217,7 @@ Evolution format:
193
217
  "entity": "user",
194
218
  "entity_id": "user_2",
195
219
  "parent_entity_id": null,
220
+ "parent_entity": null,
196
221
  "variable": null,
197
222
  "old_value": null,
198
223
  "new_value": null,
@@ -201,12 +226,13 @@ Evolution format:
201
226
  {
202
227
  "timestamp": 1741186800,
203
228
  "type": "update",
204
- "entity": "user",
205
- "entity_id": "user_1",
206
- "parent_entity_id": null,
207
- "variable": "score",
208
- "old_value": 100,
209
- "new_value": 200,
229
+ "entity": "variable",
230
+ "entity_id": "var_1",
231
+ "parent_entity_id": "ds_1",
232
+ "parent_entity": "dataset",
233
+ "variable": "name",
234
+ "old_value": "Old Name",
235
+ "new_value": "New Name",
210
236
  "name": null
211
237
  }
212
238
  ]
@@ -147,6 +147,9 @@ db.save()
147
147
  # Disable tracking
148
148
  db.save(track_evolution=False)
149
149
 
150
+ # Skip .json.js files (faster, smaller output)
151
+ db.save(write_js=False)
152
+
150
153
  # Use Excel as source (for easy editing of logs)
151
154
  db.save(evolution_xlsx=Path("path/to/evolution.xlsx"))
152
155
 
@@ -154,6 +157,27 @@ db.save(evolution_xlsx=Path("path/to/evolution.xlsx"))
154
157
  db.save(timestamp=1741186800)
155
158
  ```
156
159
 
160
+ #### Cascade Filtering
161
+
162
+ When a parent entity is added or deleted, all child entities are also added/deleted.
163
+ By default, this creates noise in the evolution log. Use `parent_relations` to automatically
164
+ filter out cascade entries:
165
+
166
+ ```python
167
+ db.save(
168
+ parent_relations={
169
+ "variable": "dataset", # variable.dataset_id → dataset
170
+ "freq": "variable", # freq.variable_id → variable
171
+ }
172
+ )
173
+ ```
174
+
175
+ With cascade filtering:
176
+ - Adding a dataset with 50 variables logs only 1 entry (the dataset add)
177
+ - Deleting a dataset logs only the parent delete, not all child deletes
178
+ - Updates are always logged (no filtering)
179
+ - Explicit child additions (to existing parent) are still logged
180
+
157
181
  When `evolution_xlsx` is provided:
158
182
  - The xlsx file becomes the source of truth (read from xlsx if it exists)
159
183
  - User edits made in Excel are preserved on subsequent saves
@@ -168,6 +192,7 @@ Evolution format:
168
192
  "entity": "user",
169
193
  "entity_id": "user_2",
170
194
  "parent_entity_id": null,
195
+ "parent_entity": null,
171
196
  "variable": null,
172
197
  "old_value": null,
173
198
  "new_value": null,
@@ -176,12 +201,13 @@ Evolution format:
176
201
  {
177
202
  "timestamp": 1741186800,
178
203
  "type": "update",
179
- "entity": "user",
180
- "entity_id": "user_1",
181
- "parent_entity_id": null,
182
- "variable": "score",
183
- "old_value": 100,
184
- "new_value": 200,
204
+ "entity": "variable",
205
+ "entity_id": "var_1",
206
+ "parent_entity_id": "ds_1",
207
+ "parent_entity": "dataset",
208
+ "variable": "name",
209
+ "old_value": "Old Name",
210
+ "new_value": "New Name",
185
211
  "name": null
186
212
  }
187
213
  ]
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "jsonjsdb"
7
- version = "0.7.3"
7
+ version = "0.8.0"
8
8
  description = "Python library for JSONJS database loading"
9
9
  authors = [{ name = "datannur" }]
10
10
  readme = "README.md"
@@ -10,6 +10,7 @@ import polars as pl
10
10
  from .evolution import (
11
11
  EvolutionEntry,
12
12
  compare_datasets,
13
+ filter_cascade_entries,
13
14
  get_timestamp,
14
15
  load_evolution,
15
16
  save_evolution,
@@ -107,6 +108,8 @@ class Jsonjsdb:
107
108
  track_evolution: bool = True,
108
109
  evolution_xlsx: Path | str | None = None,
109
110
  timestamp: int | None = None,
111
+ write_js: bool = True,
112
+ parent_relations: dict[str, str] | None = None,
110
113
  ) -> None:
111
114
  """Save all tables to disk with optional evolution tracking.
112
115
 
@@ -118,6 +121,9 @@ class Jsonjsdb:
118
121
  track_evolution: Enable change tracking (default: True)
119
122
  evolution_xlsx: Optional path for evolution.xlsx output
120
123
  timestamp: Optional timestamp override for deterministic outputs
124
+ write_js: If True, write both .json and .json.js (default: True)
125
+ parent_relations: Child->parent table mapping for cascade filtering
126
+ Example: {"variable": "dataset", "freq": "variable"}
121
127
  """
122
128
  save_path = Path(path) if path else self._path
123
129
 
@@ -145,26 +151,35 @@ class Jsonjsdb:
145
151
  # Track evolution if enabled
146
152
  if track_evolution:
147
153
  old_df = self._get_old_table(save_path, name, same_path)
148
- entries = compare_datasets(old_df, persistable_df, ts, name)
154
+ entries = compare_datasets(
155
+ old_df, persistable_df, ts, name, parent_relations
156
+ )
149
157
  new_entries.extend(entries)
150
158
 
151
159
  write_table_json(persistable_df, save_path / f"{name}.json")
152
- write_table_jsonjs(persistable_df, name, save_path / f"{name}.json.js")
160
+ if write_js:
161
+ write_table_jsonjs(persistable_df, name, save_path / f"{name}.json.js")
153
162
  table_names.append(name)
154
163
 
155
164
  # Update snapshot for next comparison
156
165
  self._original_snapshots[name] = persistable_df.clone()
157
166
 
158
167
  # Save evolution if there are new entries
168
+ if track_evolution and new_entries:
169
+ # Filter cascade entries (child add/delete when parent has same operation)
170
+ new_entries = filter_cascade_entries(new_entries)
171
+
159
172
  if track_evolution and new_entries:
160
173
  xlsx_path = Path(evolution_xlsx) if evolution_xlsx else None
161
174
  existing_entries = load_evolution(save_path, xlsx_path)
162
175
  all_entries = existing_entries + new_entries
163
176
  save_evolution(all_entries, save_path, xlsx_path)
164
- if "evolution" not in table_names:
177
+ if "evolution" not in table_names: # pragma: no branch
165
178
  table_names.append("evolution")
166
179
 
167
- write_table_index(table_names, save_path / "__table__.json", ts)
180
+ write_table_index(
181
+ table_names, save_path / "__table__.json", ts, write_js=write_js
182
+ )
168
183
 
169
184
  self._path = save_path
170
185
 
@@ -27,6 +27,7 @@ class EvolutionEntry:
27
27
  entity: str
28
28
  entity_id: str | int
29
29
  parent_entity_id: str | int | None
30
+ parent_entity: str | None
30
31
  variable: str | None
31
32
  old_value: Any
32
33
  new_value: Any
@@ -45,13 +46,40 @@ def _standardize_id(id_value: str) -> str:
45
46
  return INVALID_ID_PATTERN.sub("", trimmed)
46
47
 
47
48
 
48
- def _get_first_parent_id(row: dict[str, Any]) -> str | int | None:
49
- """Find the first foreign key column (_id or Id suffix) and return its value."""
49
+ def _get_parent_info(
50
+ row: dict[str, Any],
51
+ entity: str,
52
+ parent_relations: dict[str, str] | None,
53
+ ) -> tuple[str | None, str | int | None]:
54
+ """Get parent entity and id based on config or FK convention.
55
+
56
+ Args:
57
+ row: Row data
58
+ entity: Current entity/table name
59
+ parent_relations: Mapping of child_table -> parent_table
60
+
61
+ Returns:
62
+ Tuple of (parent_entity, parent_entity_id)
63
+ """
64
+ if parent_relations and entity in parent_relations:
65
+ parent_entity = parent_relations[entity]
66
+ fk_col = f"{parent_entity}_id"
67
+ parent_id = row.get(fk_col)
68
+ if isinstance(parent_id, (str, int)):
69
+ return (parent_entity, parent_id)
70
+ return (parent_entity, None)
71
+
72
+ # Fallback: auto-detect from first FK column
50
73
  for key, value in row.items():
51
- if key.endswith("_id") or key.endswith("Id"):
74
+ if key.endswith("_id") and key != "id":
75
+ parent_entity = key[:-3] # strip "_id"
52
76
  if isinstance(value, (str, int)):
53
- return value
54
- return None
77
+ return (parent_entity, value)
78
+ elif key.endswith("Id"):
79
+ parent_entity = key[:-2] # strip "Id"
80
+ if isinstance(value, (str, int)):
81
+ return (parent_entity, value)
82
+ return (None, None)
55
83
 
56
84
 
57
85
  def _add_composite_id_if_missing(df: pl.DataFrame) -> tuple[pl.DataFrame, bool]:
@@ -109,6 +137,7 @@ def compare_datasets(
109
137
  new_df: pl.DataFrame,
110
138
  timestamp: int,
111
139
  entity: str,
140
+ parent_relations: dict[str, str] | None = None,
112
141
  ) -> list[EvolutionEntry]:
113
142
  """Compare two datasets and return list of evolution entries.
114
143
 
@@ -117,6 +146,7 @@ def compare_datasets(
117
146
  new_df: New version of the data
118
147
  timestamp: Unix timestamp in seconds
119
148
  entity: Table/entity name
149
+ parent_relations: Mapping of child_table -> parent_table for cascade filtering
120
150
 
121
151
  Returns:
122
152
  List of EvolutionEntry objects describing the changes
@@ -130,8 +160,6 @@ def compare_datasets(
130
160
  if old_df.is_empty():
131
161
  return entries
132
162
 
133
- # Normalize IDs for consistent comparison
134
-
135
163
  # Normalize id columns to string for consistent comparison
136
164
  old_df = _normalize_id_column(old_df)
137
165
  new_df = _normalize_id_column(new_df)
@@ -144,9 +172,7 @@ def compare_datasets(
144
172
  map_new = _df_to_dict_by_id(new_df)
145
173
 
146
174
  # Determine all variables to compare
147
- if old_df.is_empty():
148
- variables = new_df.columns
149
- elif new_df.is_empty():
175
+ if new_df.is_empty():
150
176
  variables = old_df.columns
151
177
  else:
152
178
  variables = list(set(old_df.columns) | set(new_df.columns))
@@ -176,6 +202,9 @@ def compare_datasets(
176
202
  if _values_are_empty(old_value, new_value):
177
203
  continue
178
204
 
205
+ parent_entity, parent_id = _get_parent_info(
206
+ obj_new, entity, parent_relations
207
+ )
179
208
  entries.append(
180
209
  EvolutionEntry(
181
210
  timestamp=timestamp,
@@ -187,8 +216,11 @@ def compare_datasets(
187
216
  else entity_id
188
217
  ),
189
218
  parent_entity_id=(
190
- str(entity_id).split("---")[0] if has_composite_id else None
219
+ str(entity_id).split("---")[0]
220
+ if has_composite_id
221
+ else parent_id
191
222
  ),
223
+ parent_entity=None if has_composite_id else parent_entity,
192
224
  variable=variable,
193
225
  old_value=old_value,
194
226
  new_value=new_value,
@@ -199,6 +231,7 @@ def compare_datasets(
199
231
  # Detect additions
200
232
  for entity_id in ids_added:
201
233
  obj_new = map_new[entity_id]
234
+ parent_entity, parent_id = _get_parent_info(obj_new, entity, parent_relations)
202
235
  entries.append(
203
236
  EvolutionEntry(
204
237
  timestamp=timestamp,
@@ -208,8 +241,9 @@ def compare_datasets(
208
241
  _standardize_id(str(entity_id)) if has_composite_id else entity_id
209
242
  ),
210
243
  parent_entity_id=(
211
- str(entity_id).split("---")[0] if has_composite_id else None
244
+ str(entity_id).split("---")[0] if has_composite_id else parent_id
212
245
  ),
246
+ parent_entity=None if has_composite_id else parent_entity,
213
247
  variable=None,
214
248
  old_value=None,
215
249
  new_value=None,
@@ -220,6 +254,7 @@ def compare_datasets(
220
254
  # Detect deletions
221
255
  for entity_id in ids_removed:
222
256
  obj_old = map_old[entity_id]
257
+ parent_entity, parent_id = _get_parent_info(obj_old, entity, parent_relations)
223
258
  entries.append(
224
259
  EvolutionEntry(
225
260
  timestamp=timestamp,
@@ -228,7 +263,10 @@ def compare_datasets(
228
263
  entity_id=(
229
264
  _standardize_id(str(entity_id)) if has_composite_id else entity_id
230
265
  ),
231
- parent_entity_id=_get_first_parent_id(obj_old),
266
+ parent_entity_id=(
267
+ str(entity_id).split("---")[0] if has_composite_id else parent_id
268
+ ),
269
+ parent_entity=None if has_composite_id else parent_entity,
232
270
  variable=None,
233
271
  old_value=None,
234
272
  new_value=None,
@@ -243,6 +281,51 @@ def compare_datasets(
243
281
  return entries
244
282
 
245
283
 
284
+ def filter_cascade_entries(entries: list[EvolutionEntry]) -> list[EvolutionEntry]:
285
+ """Filter out cascade add/delete entries where parent has same operation.
286
+
287
+ When a parent entity is added or deleted, child entities are also added/deleted.
288
+ This function removes child entries that are part of a cascade operation,
289
+ keeping only the meaningful parent-level changes.
290
+
291
+ Args:
292
+ entries: List of evolution entries to filter
293
+
294
+ Returns:
295
+ Filtered list with cascade entries removed
296
+ """
297
+ # Index parent operations: (timestamp, type, entity, entity_id)
298
+ parent_ops: set[tuple[int, str, str, str]] = {
299
+ (e.timestamp, e.type, e.entity, str(e.entity_id))
300
+ for e in entries
301
+ if e.type in ("add", "delete")
302
+ }
303
+
304
+ result: list[EvolutionEntry] = []
305
+ for entry in entries:
306
+ # Always keep updates
307
+ if entry.type == "update":
308
+ result.append(entry)
309
+ continue
310
+
311
+ # Keep entries without parent relation
312
+ if not entry.parent_entity or entry.parent_entity_id is None:
313
+ result.append(entry)
314
+ continue
315
+
316
+ # Check if parent has the same operation in this batch
317
+ parent_key = (
318
+ entry.timestamp,
319
+ entry.type,
320
+ entry.parent_entity,
321
+ str(entry.parent_entity_id),
322
+ )
323
+ if parent_key not in parent_ops:
324
+ result.append(entry)
325
+
326
+ return result
327
+
328
+
246
329
  def load_evolution(path: Path, xlsx_path: Path | None = None) -> list[EvolutionEntry]:
247
330
  """Load existing evolution entries.
248
331
 
@@ -266,6 +349,7 @@ def load_evolution(path: Path, xlsx_path: Path | None = None) -> list[EvolutionE
266
349
  entity=row["entity"],
267
350
  entity_id=row["entity_id"],
268
351
  parent_entity_id=row.get("parent_entity_id"),
352
+ parent_entity=row.get("parent_entity"),
269
353
  variable=row.get("variable"),
270
354
  old_value=row.get("old_value"),
271
355
  new_value=row.get("new_value"),
@@ -305,10 +389,11 @@ def load_evolution_xlsx(xlsx_path: Path) -> list[EvolutionEntry]:
305
389
  entity=str(row[2]) if row[2] else "",
306
390
  entity_id=str(row[3]) if row[3] else "",
307
391
  parent_entity_id=str(row[4]) if row[4] else None,
308
- variable=str(row[5]) if row[5] else None,
309
- old_value=row[6] if row[6] else None,
310
- new_value=row[7] if row[7] else None,
311
- name=str(row[8]) if row[8] else None,
392
+ parent_entity=str(row[5]) if row[5] else None,
393
+ variable=str(row[6]) if row[6] else None,
394
+ old_value=row[7] if row[7] else None,
395
+ new_value=row[8] if row[8] else None,
396
+ name=str(row[9]) if len(row) > 9 and row[9] else None,
312
397
  )
313
398
  )
314
399
  return entries
@@ -345,6 +430,7 @@ def save_evolution(
345
430
  "entity",
346
431
  "entity_id",
347
432
  "parent_entity_id",
433
+ "parent_entity",
348
434
  "variable",
349
435
  "old_value",
350
436
  "new_value",
@@ -358,6 +444,7 @@ def save_evolution(
358
444
  entry.entity,
359
445
  entry.entity_id,
360
446
  entry.parent_entity_id,
447
+ entry.parent_entity,
361
448
  entry.variable,
362
449
  entry.old_value,
363
450
  entry.new_value,
@@ -390,6 +477,7 @@ def write_evolution_xlsx(entries: list[EvolutionEntry], xlsx_path: Path) -> None
390
477
  "entity",
391
478
  "entity_id",
392
479
  "parent_entity_id",
480
+ "parent_entity",
393
481
  "variable",
394
482
  "old_value",
395
483
  "new_value",
@@ -406,6 +494,7 @@ def write_evolution_xlsx(entries: list[EvolutionEntry], xlsx_path: Path) -> None
406
494
  entry.entity,
407
495
  str(entry.entity_id) if entry.entity_id is not None else "",
408
496
  str(entry.parent_entity_id) if entry.parent_entity_id else "",
497
+ str(entry.parent_entity) if entry.parent_entity else "",
409
498
  str(entry.variable) if entry.variable else "",
410
499
  str(entry.old_value) if entry.old_value is not None else "",
411
500
  str(entry.new_value) if entry.new_value is not None else "",
@@ -34,21 +34,26 @@ def write_table_jsonjs(df: pl.DataFrame, table_name: str, path: Path) -> None:
34
34
 
35
35
 
36
36
  def write_table_index(
37
- tables: list[str], path: Path, timestamp: Optional[int] = None
37
+ tables: list[str],
38
+ path: Path,
39
+ timestamp: Optional[int] = None,
40
+ *,
41
+ write_js: bool = True,
38
42
  ) -> None:
39
- """Write __table__.json with table metadata.
43
+ """Write __table__.json and optionally __table__.json.js with table metadata.
40
44
 
41
45
  Args:
42
46
  tables: List of table names to include
43
47
  path: Path to write __table__.json
44
48
  timestamp: Optional timestamp override (uses current time if None)
49
+ write_js: If True, also write __table__.json.js (default: True)
45
50
  """
46
51
  now = timestamp if timestamp is not None else int(time.time())
47
- entries = [{"name": name, "last_modif": now} for name in sorted(tables)]
52
+ df = pl.DataFrame([{"name": name, "last_modif": now} for name in sorted(tables)])
48
53
 
49
- with open(path, "w") as f:
50
- json.dump(entries, f, indent=2, ensure_ascii=False)
51
- f.write("\n")
54
+ write_table_json(df, path)
55
+ if write_js:
56
+ write_table_jsonjs(df, "__table__", path.with_suffix(".json.js"))
52
57
 
53
58
 
54
59
  def _prepare_df_for_write(df: pl.DataFrame) -> pl.DataFrame:
File without changes
File without changes
File without changes
File without changes
File without changes