ggblab 0.9.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. ggblab/__init__.py +44 -0
  2. ggblab/_version.py +4 -0
  3. ggblab/comm.py +243 -0
  4. ggblab/construction.py +179 -0
  5. ggblab/errors.py +142 -0
  6. ggblab/ggbapplet.py +293 -0
  7. ggblab/parser.py +486 -0
  8. ggblab/persistent_counter.py +175 -0
  9. ggblab/schema.py +114 -0
  10. ggblab/utils.py +109 -0
  11. ggblab-0.9.3.data/data/share/jupyter/labextensions/ggblab/build_log.json +730 -0
  12. ggblab-0.9.3.data/data/share/jupyter/labextensions/ggblab/install.json +5 -0
  13. ggblab-0.9.3.data/data/share/jupyter/labextensions/ggblab/package.json +210 -0
  14. ggblab-0.9.3.data/data/share/jupyter/labextensions/ggblab/schemas/ggblab/package.json.orig +205 -0
  15. ggblab-0.9.3.data/data/share/jupyter/labextensions/ggblab/schemas/ggblab/plugin.json +8 -0
  16. ggblab-0.9.3.data/data/share/jupyter/labextensions/ggblab/static/lib_index_js.bbfa36bc62ee08eb62b2.js +465 -0
  17. ggblab-0.9.3.data/data/share/jupyter/labextensions/ggblab/static/lib_index_js.bbfa36bc62ee08eb62b2.js.map +1 -0
  18. ggblab-0.9.3.data/data/share/jupyter/labextensions/ggblab/static/remoteEntry.2d29364aef8b527d773e.js +568 -0
  19. ggblab-0.9.3.data/data/share/jupyter/labextensions/ggblab/static/remoteEntry.2d29364aef8b527d773e.js.map +1 -0
  20. ggblab-0.9.3.data/data/share/jupyter/labextensions/ggblab/static/style.js +4 -0
  21. ggblab-0.9.3.data/data/share/jupyter/labextensions/ggblab/static/style_index_js.aab9f5416f41ce79cac3.js +492 -0
  22. ggblab-0.9.3.data/data/share/jupyter/labextensions/ggblab/static/style_index_js.aab9f5416f41ce79cac3.js.map +1 -0
  23. ggblab-0.9.3.dist-info/METADATA +768 -0
  24. ggblab-0.9.3.dist-info/RECORD +26 -0
  25. ggblab-0.9.3.dist-info/WHEEL +4 -0
  26. ggblab-0.9.3.dist-info/licenses/LICENSE +29 -0
ggblab/parser.py ADDED
@@ -0,0 +1,486 @@
1
+ import re
2
+ import polars as pl
3
+ import networkx as nx
4
+ from itertools import combinations, chain
5
+ from .persistent_counter import PersistentCounter
6
+ from .utils import flatten
7
+
8
+
9
+ class ggb_parser:
10
+ """Dependency graph parser for GeoGebra constructions.
11
+
12
+ Analyzes object relationships in GeoGebra constructions by building
13
+ directed graphs using NetworkX. Provides two graph representations:
14
+
15
+ - G (full dependency graph): Complete construction dependencies
16
+ - G2 (simplified subgraph): Minimal construction sequences (DEPRECATED)
17
+
18
+ The parse() method builds the forward/backward dependency graph (G).
19
+ The parse_subgraph() method attempts minimal extraction but has critical
20
+ performance limitations (see method docstring and ARCHITECTURE.md).
21
+
22
+ Command learning:
23
+ - Automatically extracts and caches GeoGebra commands from construction protocols
24
+ - Persists command names to a shelve database for cross-project learning
25
+ - Supports enable/disable of persistence via cache_enabled flag
26
+
27
+ Attributes:
28
+ df (polars.DataFrame): Construction protocol dataframe
29
+ G (nx.DiGraph): Full dependency graph
30
+ G2 (nx.DiGraph): Simplified subgraph (from parse_subgraph)
31
+ roots (list): Objects with no dependencies (in-degree = 0)
32
+ leaves (list): Terminal objects (out-degree = 0)
33
+ rd (dict): Reverse mapping from object name to DataFrame row number
34
+ ft (dict): Tokenized function definitions, flattened
35
+ command_cache (shelve.DbfilenameShelf): Persistent command database
36
+ cache_enabled (bool): Enable/disable automatic persistence
37
+
38
+ Example:
39
+ >>> parser = ggb_parser()
40
+ >>> parser.df = construction_dataframe
41
+ >>> parser.parse()
42
+ >>> print(parser.roots) # Independent objects
43
+ >>> print(parser.leaves) # Terminal constructions
44
+ >>> commands = parser.get_known_commands() # Retrieved cached commands
45
+
46
+ See:
47
+ docs/architecture.md § Dependency Parser Architecture
48
+ """
49
+ pl.Config.set_tbl_rows(-1)
50
+ COLUMNS = ["Type", "Command", "Value", "Caption", "Layer"]
51
+ SHAPES = ["point", "segment", "vector", "ray", "line", "circle", "polygon", "triangle", "quadrilateral"]
52
+
53
+ def __init__(self, cache_path=None, cache_enabled=True):
54
+ """Initialize the parser with optional command caching.
55
+
56
+ Args:
57
+ cache_path (str, optional): Path to shelve database for command persistence.
58
+ Defaults to '.ggblab_command_cache' in current directory.
59
+ cache_enabled (bool): Enable automatic persistence of discovered commands.
60
+ Default: True
61
+ """
62
+ cache_path = cache_path or '.ggblab_command_cache'
63
+ self.command_cache = PersistentCounter(cache_path=cache_path, enabled=cache_enabled)
64
+
65
+ def parse(self):
66
+ """Build the full dependency graph (G) from construction protocol.
67
+
68
+ Analyzes the construction dataframe (self.df) and builds:
69
+ - Forward dependencies: Object A depends on B (B → A edge)
70
+ - Backward dependencies: Object A is used by B (A → B edge)
71
+
72
+ The graph nodes are GeoGebra object names; edges represent dependencies.
73
+
74
+ Attributes set:
75
+ - self.G: NetworkX DiGraph of dependencies
76
+ - self.roots: Objects with no dependencies (starting points)
77
+ - self.leaves: Objects with no dependents (endpoints)
78
+ - self.rd: Reverse dict (name → DataFrame row index)
79
+ - self.ft: Tokenized function calls for each object
80
+
81
+ Also extracts and persists command names if caching is enabled.
82
+
83
+ Example:
84
+ >>> parser.df = polars.DataFrame(construction_protocol)
85
+ >>> parser.parse()
86
+ >>> print(list(parser.G.edges())) # [(A, B), (B, C), ...]
87
+ """
88
+ # reverse dict from name to row number of dataframe
89
+ self.rd = {v: k for k, v in enumerate(self.df["Name"])}
90
+
91
+ # tokenized function, flattened
92
+ self.ft = {n: list([e for e in flatten(self.tokenize_with_commas(c)) if e != ','])
93
+ for n, c in self.df.filter(pl.col("Type").is_in(self.SHAPES)).select(["Name", "Command"]).iter_rows()}
94
+
95
+ # Extract and cache command names from all commands in the dataframe
96
+ for command_str in self.df["Command"]:
97
+ if command_str:
98
+ result = self.tokenize_with_commas(command_str, extract_commands=True)
99
+ self.command_cache.increment(result['commands'])
100
+
101
+ # graph in forward/backward dependency
102
+ # self.graph = {k: self.ffd(k) for k in self.df.filter(pl.col("Type") != "text")["Name"]}
103
+ # self.rgraph = {k: self.fbd(k) for k in self.ft}
104
+
105
+ self.G = nx.DiGraph()
106
+ self.G.clear()
107
+
108
+ for n in self.ft:
109
+ for o in self.ft[n]:
110
+ if o in self.rd:
111
+ # print(n, o)
112
+ self.G.add_edge(o, n)
113
+ for o in self.fbd(n):
114
+ # print(o, ggb.ft[o])
115
+ if n in self.ft[o]:
116
+ # print(o, n)
117
+ self.G.add_edge(n, o)
118
+
119
+ self.roots = [v for v, d in self.G.in_degree() if d == 0]
120
+ self.leaves = [v for v, d in self.G.out_degree() if d == 0]
121
+
122
+ def parse_subgraph(self):
123
+ """
124
+ Extract a simplified dependency subgraph (G2) from the full graph (G).
125
+
126
+ WARNING: This implementation has significant performance limitations and
127
+ should be replaced in v1.0. See ARCHITECTURE.md for details.
128
+
129
+ Algorithm:
130
+ - Enumerates all combinations of root objects (O(2^n) combinations)
131
+ - For each combination, identifies dependent objects that exclusively depend on that combination
132
+ - Adds edges to G2 when dependencies are uniquely determined
133
+
134
+ KNOWN LIMITATIONS (Critical):
135
+ 1. **Combinatorial Explosion**: O(2^n) time complexity where n = number of root objects.
136
+ - With 15 roots: ~32,000 paths (manageable)
137
+ - With 20 roots: ~1,000,000 paths (slow)
138
+ - With 25+ roots: computation becomes intractable
139
+
140
+ 2. **Infinite Loop Risk**: The while loop may not terminate under certain graph topologies
141
+ where _nodes1 is not updated in each iteration.
142
+
143
+ 3. **Limited N-ary Dependency Support**: Only handles 1-2 parents. Constructions where
144
+ 3+ objects jointly create one output (e.g., polygon from 3+ points) have incomplete
145
+ representation in G2 (these edges are silently skipped).
146
+
147
+ 4. **Redundant Computation**: Neighbor lists are recomputed on every iteration
148
+ of inner loops, causing O(n) redundant work.
149
+
150
+ 5. **Debug Output**: Contains print() statements that should be removed for production.
151
+
152
+ WORKAROUND:
153
+ - Use with constructions having <15 independent root objects
154
+ - For larger constructions, consider implementing the optimized algorithm
155
+ described in ARCHITECTURE.md § Dependency Parser Architecture
156
+
157
+ FUTURE: Replace with topological sort + reachability pruning in v1.0 for O(n(n+m)) complexity.
158
+
159
+ See: https://github.com/[repo]/ARCHITECTURE.md#dependency-parser-architecture
160
+ """
161
+ self.G2 = nx.DiGraph()
162
+ self.G2.clear()
163
+
164
+ _nodes0 = set()
165
+ _nodes1 = {n for n in self.roots if n in self.ft} # set(['C', 'A'])
166
+
167
+ while _nodes1:
168
+ # print(f"path: {_nodes0} {_nodes1}")
169
+
170
+ _paths = []
171
+ for __p in (list(chain.from_iterable(combinations(_nodes1, r)
172
+ for r in range(1, len(_nodes1) + 1)))):
173
+ _paths.append(_nodes0 | set(__p))
174
+
175
+ for _nodes2 in _paths:
176
+ # _nodes2 = set(__p)
177
+ # print(f"to: {_nodes2 - _nodes0}")
178
+
179
+ _nodes3 = set()
180
+ for n1 in _nodes2:
181
+ _n = [set(self.G.neighbors(__n)) for __n in _nodes2]
182
+ # print(set().union(*_n))
183
+
184
+ for n0 in set().union(*_n):
185
+ # print(f"{n0} {ggb.ft[n0]}")
186
+ d = {n: nx.descendants(self.G, n) for n in self.G.neighbors(n0)}
187
+ for n1 in sorted(d.keys(), key=lambda e: len(d[e]), reverse=True):
188
+ # if len(d[n1]) and not ggb.fbd(n0) - (_nodes2 | {n1}):
189
+ if len(d[n1]) and not nx.ancestors(self.G, n0) - (_nodes2 | {n1}):
190
+ _nodes3 |= {n0}
191
+
192
+ for n in _nodes3 - _nodes2 - _nodes1:
193
+ match len(_nodes2 - _nodes0):
194
+ case 1:
195
+ o, = tuple(_nodes2 - _nodes0)
196
+ print(f"found: '{o}' => '{n}'")
197
+ self.G2.add_edge(o, n)
198
+ case 2:
199
+ o1, o2, = tuple(_nodes2 - _nodes0)
200
+ if o1 in self.G2 and n in self.G2.neighbors(o1):
201
+ pass
202
+ elif o2 in self.G2 and n in self.G2.neighbors(o2):
203
+ pass
204
+ else:
205
+ print(f"found: '{o1}', '{o2}' => '{n}'")
206
+ self.G2.add_edge(o1, n)
207
+ self.G2.add_edge(o2, n)
208
+ case _:
209
+ pass
210
+
211
+ _nodes0 |= _nodes1
212
+ _nodes1 = _nodes3 - _nodes2 - _nodes1
213
+
214
+ # def parse_subgraph_improved(self):
215
+ # """
216
+ # Identify minimal construction sequences by analyzing the dependency graph.
217
+ # Uses a topological sort + pruning approach instead of exhaustive path enumeration.
218
+ # """
219
+ # self.G2 = nx.DiGraph()
220
+
221
+ # # Identify which nodes are essential (no alternative path)
222
+ # for node in self.G.nodes():
223
+ # direct_parents = list(self.G.predecessors(node))
224
+ # if not direct_parents:
225
+ # continue
226
+
227
+ # # Check if all direct parents are needed
228
+ # # A parent is needed if removing it disconnects node from any root
229
+ # parents_to_keep = []
230
+ # for parent in direct_parents:
231
+ # # Check if there's an alternative path without this parent
232
+ # G_without = self.G.copy()
233
+ # G_without.remove_edge(parent, node)
234
+ # has_alternative = nx.has_path(G_without, parent, node)
235
+
236
+ # if not has_alternative:
237
+ # parents_to_keep.append(parent)
238
+
239
+ # # Add edges for essential parents
240
+ # for parent in parents_to_keep:
241
+ # self.G2.add_edge(parent, node)
242
+
243
+ def ffd(self, k, recursive=True):
244
+ if recursive:
245
+ def _ffd(k):
246
+ if k in self.ft:
247
+ # regular polygon contain not much dependency (includes new vertices and auxiliary edges)
248
+ # return [[e, _ffd(e)] for e in ft if k in (ft[e] + find_returns(k)[1:])]
249
+ return ([[e, _ffd(e)] for e in self.ft if k in self.ft[e]]
250
+ + [[e, _ffd(e)] for e in self.find_returns(k)[1:]])
251
+ else:
252
+ return []
253
+
254
+ return set(flatten(_ffd(k)))
255
+ else:
256
+ return {e for e in self.ft if k in self.ft[e]}
257
+
258
+ def fbd(self, k, recursive=True):
259
+ if recursive:
260
+ def _fbd(k):
261
+ if k in self.ft:
262
+ return [[e, _fbd(e)] for e in self.ft[k] if e in self.ft] + [self.vertex_on_regular_polygon(k)]
263
+ else:
264
+ return []
265
+
266
+ return set(flatten(_fbd(k))) - {k}
267
+ else:
268
+ return {e for e in self.ft[k] if e in self.ft}
269
+
270
+ def initialize_dataframe(self, df=None, file=None):
271
+ if df is not None:
272
+ self.df = df
273
+ elif file is not None:
274
+ self.df = pl.read_parquet(file)
275
+ else:
276
+ raise ValueError("Either df or file must be provided.")
277
+ self.df = (self.df
278
+ .transpose(include_header=True, header_name="Name", column_names=self.COLUMNS)
279
+ .with_columns(pl.col("Layer").cast(pl.Int64).fill_null(0)))
280
+ return self
281
+
282
+ def write_parquet(self, file=None):
283
+ if file is not None:
284
+ self.df.write_parquet(file)
285
+ return self
286
+
287
+ def vertex_on_regular_polygon(self, v):
288
+ try:
289
+ if self.ft[v][0] == "Polygon" and int(self.ft[v][3]):
290
+ return [self.df.filter((pl.col("Command") == self.df[self.rd[v]]["Command"]) & (pl.col("Type") == "polygon"))["Name"].item()]
291
+ except (IndexError, ValueError):
292
+ return []
293
+ else:
294
+ return []
295
+
296
+ def tokenize_with_commas(self, cmd_string, extract_commands=False): # register_expr=False
297
+ """Tokenize a GeoGebra command string into a structured list representation.
298
+
299
+ Parses a mathematical or GeoGebra-like command string and converts it into
300
+ a nested list structure that preserves parentheses, brackets, and commas.
301
+ This is useful for analyzing GeoGebra command syntax and extracting object
302
+ dependencies.
303
+
304
+ === COMMA PRESERVATION AND GEOGEBRA'S IMPLICIT MULTIPLICATION ===
305
+
306
+ This tokenizer preserves commas as explicit tokens for a critical reason:
307
+ GeoGebra outputs commands with implicit multiplication operators omitted.
308
+
309
+ Example:
310
+ Internal representation: Circle(2 * a, b)
311
+ GeoGebra output: Circle(2a, b) <- Information loss!
312
+
313
+ The '*' operator is completely omitted, destroying information. This is a
314
+ one-way transformation: we can't reliably reconstruct "2*a" from "2a" without
315
+ external context (is it "2 times a" or "variable named 2a"?).
316
+
317
+ BUT: GeoGebra ALWAYS uses comma-separation for parameter lists. We exploit
318
+ this invariant. By preserving commas in the token stream, we can:
319
+ 1. Identify parameter boundaries (comma = separator)
320
+ 2. Use whitespace/context to infer where implicit multiplication occurred
321
+
322
+ This is a workaround for GeoGebra's poor design. So the question becomes:
323
+
324
+ - BLAME GeoGebra for being a one-way encoder (lose the *? Why?)
325
+ - PRAISE the developer who recognized the comma-separation invariant
326
+
327
+ Engineering lesson: deal with imperfect systems and find creative solutions.
328
+ GeoGebra didn't help us. We had to be smarter than it.
329
+
330
+ Args:
331
+ cmd_string (str): Input command string (e.g., "Circle(A, Distance(A, B))").
332
+ extract_commands (bool, optional): If True, also extract command name candidates
333
+ (tokens preceding '(' or '['). Returns a dict
334
+ with 'tokens' and 'commands' keys. If False
335
+ (default), returns only the token list for
336
+ backward compatibility. Default: False
337
+ # register_expr (bool, optional): Future feature - if True, replace object references
338
+ # with abstract labels like ${0}, ${1}, etc. based on
339
+ # generation order in the construction protocol.
340
+ # This is useful because GeoGebra applets may rename
341
+ # objects at runtime, but the generation order remains
342
+ # stable within a construction. Not yet implemented.
343
+
344
+ Returns:
345
+ list or dict:
346
+ - If extract_commands=False (default): Nested list structure with tokens.
347
+ Parentheses/brackets create nested lists; commas are preserved as ','.
348
+ - If extract_commands=True: Dict with keys:
349
+ - 'tokens': Nested list structure (as above)
350
+ - 'commands': Set of command name candidates (tokens preceding '(' or '[')
351
+
352
+ Raises:
353
+ ValueError: If parentheses/brackets are mismatched.
354
+
355
+ Examples:
356
+ >>> tokenize_with_commas("Circle(A, 2)")
357
+ ['Circle', ['A', ',', '2']]
358
+
359
+ >>> tokenize_with_commas("Circle(A, 2)", extract_commands=True)
360
+ {'tokens': ['Circle', ['A', ',', '2']], 'commands': {'Circle'}}
361
+
362
+ >>> tokenize_with_commas("Distance(Point(1, 2), B)")
363
+ ['Distance', [['Point', ['1', ',', '2']], ',', 'B']]
364
+
365
+ >>> tokenize_with_commas("Distance(Point(1, 2), B)", extract_commands=True)
366
+ {'tokens': ['Distance', [['Point', ['1', ',', '2']], ',', 'B']], 'commands': {'Distance', 'Point'}}
367
+
368
+ Note:
369
+ Empty or non-string input returns an empty list (or empty dict if
370
+ extract_commands=True) without raising an error.
371
+
372
+ Commas are INTENTIONALLY preserved as tokens to work around GeoGebra's
373
+ implicit multiplication. This is not a quirk; it's the core design decision.
374
+
375
+ Future (register_expr parameter): When implemented, would enable stable object
376
+ references by using construction order indices instead of runtime labels.
377
+ Example output: ['Circle', ['${0}', ',', '${1}']] if register_expr=True
378
+ and the objects were the 0th and 1st in the protocol.
379
+ """
380
+ if not cmd_string or not isinstance(cmd_string, str):
381
+ # raise ValueError("Input must be a non-empty string.")
382
+ if extract_commands:
383
+ return {'tokens': [], 'commands': set()}
384
+ return []
385
+
386
+ # Regex pattern to match (1) parentheses, (2) commas, or (3) any sequence of non-spacing characters.
387
+ tokens = re.findall(r'[()\[\],]|[^()\[\]\s,]+', cmd_string)
388
+
389
+ stack = [[]]
390
+ commands = set() if extract_commands else None
391
+ prev_token = None
392
+
393
+ for token in tokens:
394
+ if token in ['(', '[']:
395
+ # If extracting commands and previous token looks like a command name, save it
396
+ if extract_commands and prev_token and isinstance(prev_token, str) and prev_token[0].isalpha():
397
+ commands.add(prev_token)
398
+ # Begin a new nested list
399
+ new_list = []
400
+ stack[-1].append(new_list)
401
+ stack.append(new_list)
402
+ prev_token = None
403
+ elif token in [')', ']']:
404
+ # Close an active nested list
405
+ if len(stack) > 1:
406
+ stack.pop()
407
+ else:
408
+ raise ValueError("Mismatched parentheses/brackets in input string.")
409
+ prev_token = None
410
+ elif token == ',':
411
+ # Treat commas as tokens
412
+ stack[-1].append(',')
413
+ prev_token = None
414
+ else:
415
+ # Normal token gets added to the current list
416
+ # Future: if register_expr and token in rd:
417
+ # token = f"${rd[token]}" # Replace with abstract order-based label
418
+ stack[-1].append(token)
419
+ prev_token = token
420
+
421
+ if len(stack) != 1:
422
+ raise ValueError("Mismatched parentheses/brackets in input string.")
423
+
424
+ # Auto-cache commands if extract_commands is True
425
+ if extract_commands and commands:
426
+ self.command_cache.increment(commands)
427
+ return {'tokens': stack[0], 'commands': commands}
428
+
429
+ return stack[0]
430
+
431
+ def reconstruct_from_tokens(self, parsed_tokens):
432
+ """Reconstruct the original command string from tokenized structured list.
433
+
434
+ Takes a nested list structure produced by tokenize_with_commas() and
435
+ reconstructs the original command string with proper parentheses, commas,
436
+ and spacing.
437
+
438
+ Args:
439
+ parsed_tokens (list or str): Tokenized structured list, or a single
440
+ token as a string.
441
+
442
+ Returns:
443
+ str: Reconstructed command string matching the original input structure.
444
+
445
+ Raises:
446
+ ValueError: If parsed_tokens contains unexpected types.
447
+
448
+ Examples:
449
+ >>> parser.reconstruct_from_tokens(['Circle', ['A', ',', '2']])
450
+ 'Circle(A, 2)'
451
+
452
+ >>> parser.reconstruct_from_tokens(['Distance', [['Point', ['1', ',', '2']], ',', 'B']])
453
+ 'Distance(Point(1, 2), B)'
454
+
455
+ Note:
456
+ This function is the inverse of tokenize_with_commas(). It handles
457
+ proper spacing around operators and parentheses.
458
+
459
+ The 'register_expr' parameter (commented out) was intended for register expressions,
460
+ where applet-assigned labels could be replaced with construction-order-based
461
+ abstract expressions like '${n}', since GeoGebra may reassign object labels
462
+ but construction order remains stable.
463
+ """
464
+ if isinstance(parsed_tokens, str):
465
+ # If the token is a string, return it directly
466
+ return parsed_tokens
467
+
468
+ elif isinstance(parsed_tokens, list):
469
+ result = []
470
+ for i, token in enumerate(parsed_tokens):
471
+ if isinstance(token, list):
472
+ # For nested lists, recursively reconstruct and wrap in parentheses
473
+ result.append(f"({self.reconstruct_from_tokens(token)})")
474
+ elif token == ',':
475
+ # Append a comma directly
476
+ result.append(',')
477
+ else:
478
+ # For normal tokens, add them to the result list
479
+ result.append(token)
480
+
481
+ # Reconstruct the final string with proper spacing and joining rules
482
+ return re.sub(r'^\- ', '-',
483
+ re.sub(r'([^+\-*/]) \(', r'\1(',
484
+ ' '.join(result).replace(' , ', ', ')))
485
+ else:
486
+ raise ValueError("Unexpected token type in parsed_tokens.")
@@ -0,0 +1,175 @@
1
+ import shelve
2
+
3
+
4
+ class PersistentCounter:
5
+ """Persistent counter using shelve database.
6
+
7
+ Manages persistent key-value storage with automatic counting across sessions.
8
+ Provides a dict-like interface with iteration, containment checks, and indexing.
9
+
10
+ Attributes:
11
+ cache_path (str): Path to shelve database
12
+ enabled (bool): Enable/disable persistence
13
+ _db: shelve.DbfilenameShelf instance
14
+
15
+ Examples:
16
+ >>> counter = PersistentCounter('my_cache.db')
17
+ >>> counter.increment(['item1', 'item2'])
18
+ >>> 'item1' in counter
19
+ True
20
+ >>> counter['item1']
21
+ 1
22
+ >>> for key in counter:
23
+ ... print(f"{key}: {counter[key]}")
24
+ """
25
+ def __init__(self, cache_path='persistent_counter.db', enabled=True):
26
+ """Initialize the persistent counter.
27
+
28
+ Args:
29
+ cache_path (str): Path to shelve database. Default: 'persistent_counter.db'
30
+ enabled (bool): Enable persistence. Default: True
31
+ """
32
+ self.cache_path = cache_path
33
+ self.enabled = enabled
34
+ self._db = None
35
+ if self.enabled:
36
+ self._open()
37
+
38
+ def _open(self):
39
+ """Open the shelve database."""
40
+ try:
41
+ self._db = shelve.open(self.cache_path)
42
+ except Exception as e:
43
+ print(f"Warning: Could not open database at {self.cache_path}: {e}")
44
+ self._db = None
45
+
46
+ def increment(self, keys):
47
+ """Increment counts for given keys.
48
+
49
+ Args:
50
+ keys (set or iterable): Keys to increment
51
+ """
52
+ if not self.enabled or self._db is None:
53
+ return
54
+
55
+ for key in keys:
56
+ if key:
57
+ try:
58
+ count = self._db.get(key, 0)
59
+ self._db[key] = count + 1
60
+ self._db.sync()
61
+ except Exception as e:
62
+ print(f"Warning: Could not increment key '{key}': {e}")
63
+
64
+ def get_all(self):
65
+ """Retrieve all stored key-value pairs.
66
+
67
+ Returns:
68
+ dict: Keys mapped to their counts
69
+ """
70
+ if not self.enabled or self._db is None:
71
+ return {}
72
+
73
+ try:
74
+ return dict(self._db)
75
+ except Exception as e:
76
+ print(f"Warning: Could not retrieve stored data: {e}")
77
+ return {}
78
+
79
+ def clear(self):
80
+ """Clear all stored data."""
81
+ if not self.enabled or self._db is None:
82
+ return
83
+
84
+ try:
85
+ self._db.clear()
86
+ self._db.sync()
87
+ except Exception as e:
88
+ print(f"Warning: Could not clear data: {e}")
89
+
90
+ def close(self):
91
+ """Close the database."""
92
+ if self._db is not None:
93
+ try:
94
+ self._db.close()
95
+ self._db = None
96
+ except Exception as e:
97
+ print(f"Warning: Could not close database: {e}")
98
+
99
+ def __contains__(self, key):
100
+ """Check if a key exists in the counter.
101
+
102
+ Args:
103
+ key: Key to check
104
+
105
+ Returns:
106
+ bool: True if key exists, False otherwise
107
+ """
108
+ if not self.enabled or self._db is None:
109
+ return False
110
+ try:
111
+ return key in self._db
112
+ except Exception:
113
+ return False
114
+
115
+ def __iter__(self):
116
+ """Iterate over keys in the counter.
117
+
118
+ Yields:
119
+ Keys in the database
120
+ """
121
+ if not self.enabled or self._db is None:
122
+ return iter([])
123
+ try:
124
+ return iter(self._db)
125
+ except Exception:
126
+ return iter([])
127
+
128
+ def __len__(self):
129
+ """Return the number of keys in the counter.
130
+
131
+ Returns:
132
+ int: Number of stored keys
133
+ """
134
+ if not self.enabled or self._db is None:
135
+ return 0
136
+ try:
137
+ return len(self._db)
138
+ except Exception:
139
+ return 0
140
+
141
+ def __getitem__(self, key):
142
+ """Get the count for a key.
143
+
144
+ Args:
145
+ key: Key to retrieve
146
+
147
+ Returns:
148
+ int: Count for the key
149
+
150
+ Raises:
151
+ KeyError: If key does not exist
152
+ """
153
+ if not self.enabled or self._db is None:
154
+ raise KeyError(key)
155
+ try:
156
+ return self._db[key]
157
+ except KeyError:
158
+ raise
159
+ except Exception as e:
160
+ raise KeyError(f"Error retrieving key '{key}': {e}")
161
+
162
+ def __setitem__(self, key, value):
163
+ """Set the count for a key.
164
+
165
+ Args:
166
+ key: Key to set
167
+ value: Count value to set
168
+ """
169
+ if not self.enabled or self._db is None:
170
+ return
171
+ try:
172
+ self._db[key] = value
173
+ self._db.sync()
174
+ except Exception as e:
175
+ print(f"Warning: Could not set key '{key}': {e}")