python-oop-analyzer 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,526 @@
1
+ """
2
+ Dictionary Usage Rule.
3
+
4
+ This rule detects dictionary usage that should be replaced by proper objects
5
+ like dataclasses, Pydantic models, NamedTuples, or custom classes.
6
+
7
+ Dictionaries are acceptable for:
8
+ - Parsing RPC/REST API responses (at the boundary)
9
+ - Dynamic key-value storage where keys are truly dynamic
10
+ - Temporary data transformation
11
+
12
+ Dictionaries should NOT be used for:
13
+ - Passing data between abstraction layers
14
+ - Function parameters representing structured data
15
+ - Return values representing domain objects
16
+ - Class attributes storing structured data
17
+ """
18
+
19
+ import ast
20
+ from typing import Any
21
+
22
+ from .base import BaseRule, RuleResult, RuleViolation
23
+
24
+
25
+ class DictionaryUsageRule(BaseRule):
26
+ """
27
+ Detects dictionary usage that should be replaced by objects.
28
+
29
+ Patterns detected:
30
+ - Functions returning dict literals with fixed keys
31
+ - Functions accepting dict parameters for structured data
32
+ - Dict literals with string keys used as structured data
33
+ - Type hints using dict for structured data (Dict[str, Any])
34
+ - Accessing dict with string literal keys repeatedly
35
+ """
36
+
37
+ name = "dictionary_usage"
38
+ description = "Detect dictionary usage that should be objects"
39
+ severity = "warning"
40
+
41
+ # Patterns that suggest acceptable dict usage (API boundaries)
42
+ API_BOUNDARY_PATTERNS = {
43
+ "response",
44
+ "request",
45
+ "payload",
46
+ "json",
47
+ "data",
48
+ "body",
49
+ "parse",
50
+ "serialize",
51
+ "deserialize",
52
+ "to_dict",
53
+ "from_dict",
54
+ "to_json",
55
+ "from_json",
56
+ "api",
57
+ "http",
58
+ "rest",
59
+ "rpc",
60
+ }
61
+
62
+ def __init__(self, options: dict[str, Any] | None = None):
63
+ super().__init__(options)
64
+ self.min_dict_keys = self.options.get("min_dict_keys", 2)
65
+ self.check_return_dicts = self.options.get("check_return_dicts", True)
66
+ self.check_dict_params = self.options.get("check_dict_params", True)
67
+ self.check_dict_access = self.options.get("check_dict_access", True)
68
+ self.allow_api_boundaries = self.options.get("allow_api_boundaries", True)
69
+
70
+ def analyze(
71
+ self,
72
+ tree: ast.Module,
73
+ source: str,
74
+ file_path: str,
75
+ ) -> RuleResult:
76
+ """Analyze the AST for dictionary usage patterns."""
77
+ visitor = DictionaryUsageVisitor(
78
+ file_path=file_path,
79
+ source=source,
80
+ min_dict_keys=self.min_dict_keys,
81
+ check_return_dicts=self.check_return_dicts,
82
+ check_dict_params=self.check_dict_params,
83
+ check_dict_access=self.check_dict_access,
84
+ allow_api_boundaries=self.allow_api_boundaries,
85
+ api_boundary_patterns=self.API_BOUNDARY_PATTERNS,
86
+ )
87
+ visitor.visit(tree)
88
+
89
+ return RuleResult(
90
+ rule_name=self.name,
91
+ violations=visitor.violations,
92
+ summary={
93
+ "total_dict_violations": len(visitor.violations),
94
+ "dict_return_violations": visitor.dict_return_count,
95
+ "dict_param_violations": visitor.dict_param_count,
96
+ "dict_access_violations": visitor.dict_access_count,
97
+ "dict_literal_violations": visitor.dict_literal_count,
98
+ },
99
+ metadata={
100
+ "patterns": visitor.patterns,
101
+ },
102
+ )
103
+
104
+
105
+ class DictionaryUsageVisitor(ast.NodeVisitor):
106
+ """AST visitor that detects problematic dictionary usage."""
107
+
108
+ def __init__(
109
+ self,
110
+ file_path: str,
111
+ source: str,
112
+ min_dict_keys: int = 2,
113
+ check_return_dicts: bool = True,
114
+ check_dict_params: bool = True,
115
+ check_dict_access: bool = True,
116
+ allow_api_boundaries: bool = True,
117
+ api_boundary_patterns: set[str] | None = None,
118
+ ):
119
+ self.file_path = file_path
120
+ self.source = source
121
+ self.min_dict_keys = min_dict_keys
122
+ self.check_return_dicts = check_return_dicts
123
+ self.check_dict_params = check_dict_params
124
+ self.check_dict_access = check_dict_access
125
+ self.allow_api_boundaries = allow_api_boundaries
126
+ self.api_boundary_patterns = api_boundary_patterns or set()
127
+
128
+ self.violations: list[RuleViolation] = []
129
+ self.patterns: list[dict[str, Any]] = []
130
+ self.dict_return_count = 0
131
+ self.dict_param_count = 0
132
+ self.dict_access_count = 0
133
+ self.dict_literal_count = 0
134
+
135
+ self._current_function: str | None = None
136
+ self._current_class: str | None = None
137
+ self._dict_key_accesses: dict[str, list[str]] = {} # var_name -> [keys accessed]
138
+
139
+ def visit_ClassDef(self, node: ast.ClassDef) -> None:
140
+ """Track class context."""
141
+ old_class = self._current_class
142
+ self._current_class = node.name
143
+ self.generic_visit(node)
144
+ self._current_class = old_class
145
+
146
+ def visit_FunctionDef(self, node: ast.FunctionDef) -> None:
147
+ """Analyze function for dictionary patterns."""
148
+ old_function = self._current_function
149
+ self._current_function = node.name
150
+ self._dict_key_accesses = {}
151
+
152
+ # Check if this is an API boundary function
153
+ is_api_boundary = self._is_api_boundary_context(node.name)
154
+
155
+ # Check return type hint for Dict[str, Any]
156
+ if self.check_return_dicts and not is_api_boundary:
157
+ self._check_return_type_hint(node)
158
+
159
+ # Check parameters for dict type hints
160
+ if self.check_dict_params and not is_api_boundary:
161
+ self._check_param_type_hints(node)
162
+
163
+ self.generic_visit(node)
164
+
165
+ # After visiting, check for repeated dict key access
166
+ if self.check_dict_access and not is_api_boundary:
167
+ self._check_dict_key_access_patterns(node)
168
+
169
+ self._current_function = old_function
170
+
171
+ def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None:
172
+ """Handle async functions."""
173
+ old_function = self._current_function
174
+ self._current_function = node.name
175
+ self._dict_key_accesses = {}
176
+
177
+ is_api_boundary = self._is_api_boundary_context(node.name)
178
+
179
+ if self.check_return_dicts and not is_api_boundary:
180
+ self._check_return_type_hint(node)
181
+
182
+ if self.check_dict_params and not is_api_boundary:
183
+ self._check_param_type_hints(node)
184
+
185
+ self.generic_visit(node)
186
+
187
+ if self.check_dict_access and not is_api_boundary:
188
+ self._check_dict_key_access_patterns(node)
189
+
190
+ self._current_function = old_function
191
+
192
+ def visit_Return(self, node: ast.Return) -> None:
193
+ """Check return statements for dict literals."""
194
+ if not self.check_return_dicts:
195
+ self.generic_visit(node)
196
+ return
197
+
198
+ if self._is_api_boundary_context(self._current_function):
199
+ self.generic_visit(node)
200
+ return
201
+
202
+ if node.value and isinstance(node.value, ast.Dict):
203
+ keys = self._extract_dict_keys(node.value)
204
+ if len(keys) >= self.min_dict_keys:
205
+ self._add_dict_return_violation(node, keys)
206
+
207
+ self.generic_visit(node)
208
+
209
+ def visit_Subscript(self, node: ast.Subscript) -> None:
210
+ """Track dict key access patterns like data["key"]."""
211
+ if isinstance(node.slice, ast.Constant) and isinstance(node.slice.value, str):
212
+ if isinstance(node.value, ast.Name):
213
+ var_name = node.value.id
214
+ key = node.slice.value
215
+ if var_name not in self._dict_key_accesses:
216
+ self._dict_key_accesses[var_name] = []
217
+ self._dict_key_accesses[var_name].append(key)
218
+
219
+ self.generic_visit(node)
220
+
221
+ def visit_Assign(self, node: ast.Assign) -> None:
222
+ """Check assignments of dict literals."""
223
+ if isinstance(node.value, ast.Dict):
224
+ keys = self._extract_dict_keys(node.value)
225
+ if len(keys) >= self.min_dict_keys:
226
+ # Check if it's being assigned to a variable (not returned)
227
+ if not self._is_api_boundary_context(self._current_function):
228
+ self._add_dict_literal_violation(node, keys)
229
+
230
+ self.generic_visit(node)
231
+
232
+ def _is_api_boundary_context(self, name: str | None) -> bool:
233
+ """Check if the current context suggests API boundary."""
234
+ if not self.allow_api_boundaries or not name:
235
+ return False
236
+
237
+ name_lower = name.lower()
238
+ for pattern in self.api_boundary_patterns:
239
+ if pattern in name_lower:
240
+ return True
241
+
242
+ # Also check class name
243
+ if self._current_class:
244
+ class_lower = self._current_class.lower()
245
+ for pattern in self.api_boundary_patterns:
246
+ if pattern in class_lower:
247
+ return True
248
+
249
+ return False
250
+
251
+ def _check_return_type_hint(
252
+ self,
253
+ node: ast.FunctionDef | ast.AsyncFunctionDef,
254
+ ) -> None:
255
+ """Check if return type hint uses Dict[str, Any] or similar."""
256
+ if node.returns and self._is_dict_type_hint(node.returns):
257
+ self._add_dict_type_hint_violation(node, "return")
258
+
259
+ def _check_param_type_hints(
260
+ self,
261
+ node: ast.FunctionDef | ast.AsyncFunctionDef,
262
+ ) -> None:
263
+ """Check if parameters use Dict type hints."""
264
+ for arg in node.args.args:
265
+ if arg.annotation and self._is_dict_type_hint(arg.annotation):
266
+ # Skip 'self' and common acceptable patterns
267
+ if arg.arg not in ("self", "cls", "kwargs", "options", "config"):
268
+ self._add_dict_param_violation(node, arg)
269
+
270
+ def _is_dict_type_hint(self, node: ast.expr) -> bool:
271
+ """Check if a type hint represents a dict type."""
272
+ # dict or Dict
273
+ if isinstance(node, ast.Name):
274
+ return node.id in ("dict", "Dict")
275
+
276
+ # Dict[str, Any] or dict[str, Any]
277
+ if isinstance(node, ast.Subscript):
278
+ if isinstance(node.value, ast.Name):
279
+ return node.value.id in ("dict", "Dict")
280
+ if isinstance(node.value, ast.Attribute):
281
+ return node.value.attr in ("Dict",)
282
+
283
+ return False
284
+
285
+ def _extract_dict_keys(self, node: ast.Dict) -> list[str]:
286
+ """Extract string keys from a dict literal."""
287
+ keys: list[str] = []
288
+ for key in node.keys:
289
+ if isinstance(key, ast.Constant) and isinstance(key.value, str):
290
+ keys.append(key.value)
291
+ return keys
292
+
293
+ def _check_dict_key_access_patterns(
294
+ self,
295
+ node: ast.FunctionDef | ast.AsyncFunctionDef,
296
+ ) -> None:
297
+ """Check for repeated dict key access suggesting structured data."""
298
+ for var_name, keys in self._dict_key_accesses.items():
299
+ unique_keys = set(keys)
300
+ if len(unique_keys) >= self.min_dict_keys:
301
+ self._add_dict_access_violation(node, var_name, list(unique_keys))
302
+
303
+ def _add_dict_return_violation(
304
+ self,
305
+ node: ast.Return,
306
+ keys: list[str],
307
+ ) -> None:
308
+ """Add violation for returning a dict literal."""
309
+ self.dict_return_count += 1
310
+ keys_str = ", ".join(f"'{k}'" for k in keys[:5])
311
+ if len(keys) > 5:
312
+ keys_str += ", ..."
313
+
314
+ self.violations.append(
315
+ RuleViolation(
316
+ rule_name="dictionary_usage",
317
+ message=(
318
+ f"Function '{self._current_function}' returns a dict literal with "
319
+ f"fixed keys [{keys_str}]. Consider using a dataclass or typed object."
320
+ ),
321
+ file_path=self.file_path,
322
+ line=node.lineno,
323
+ column=node.col_offset,
324
+ severity="warning",
325
+ suggestion=(
326
+ "Replace the dictionary with a dataclass, NamedTuple, or Pydantic model. "
327
+ "This provides type safety, IDE support, and clearer API contracts."
328
+ ),
329
+ code_snippet=self._get_source_line(node.lineno),
330
+ metadata={
331
+ "pattern": "dict_return",
332
+ "keys": keys,
333
+ "function": self._current_function,
334
+ "class": self._current_class,
335
+ },
336
+ )
337
+ )
338
+ self.patterns.append(
339
+ {
340
+ "type": "dict_return",
341
+ "line": node.lineno,
342
+ "keys": keys,
343
+ }
344
+ )
345
+
346
+ def _add_dict_literal_violation(
347
+ self,
348
+ node: ast.Assign,
349
+ keys: list[str],
350
+ ) -> None:
351
+ """Add violation for assigning a dict literal with fixed keys."""
352
+ self.dict_literal_count += 1
353
+ keys_str = ", ".join(f"'{k}'" for k in keys[:5])
354
+ if len(keys) > 5:
355
+ keys_str += ", ..."
356
+
357
+ # Get the variable name if possible
358
+ var_name = "<variable>"
359
+ if node.targets and isinstance(node.targets[0], ast.Name):
360
+ var_name = node.targets[0].id
361
+
362
+ self.violations.append(
363
+ RuleViolation(
364
+ rule_name="dictionary_usage",
365
+ message=(
366
+ f"Dict literal assigned to '{var_name}' with fixed keys [{keys_str}]. "
367
+ f"Consider using a dataclass or typed object instead."
368
+ ),
369
+ file_path=self.file_path,
370
+ line=node.lineno,
371
+ column=node.col_offset,
372
+ severity="info",
373
+ suggestion=(
374
+ "If this dictionary represents structured data with known keys, "
375
+ "consider using a dataclass or NamedTuple for better type safety."
376
+ ),
377
+ code_snippet=self._get_source_line(node.lineno),
378
+ metadata={
379
+ "pattern": "dict_literal",
380
+ "keys": keys,
381
+ "variable": var_name,
382
+ "function": self._current_function,
383
+ "class": self._current_class,
384
+ },
385
+ )
386
+ )
387
+ self.patterns.append(
388
+ {
389
+ "type": "dict_literal",
390
+ "line": node.lineno,
391
+ "keys": keys,
392
+ }
393
+ )
394
+
395
+ def _add_dict_type_hint_violation(
396
+ self,
397
+ node: ast.FunctionDef | ast.AsyncFunctionDef,
398
+ context: str,
399
+ ) -> None:
400
+ """Add violation for using Dict type hint."""
401
+ self.dict_param_count += 1
402
+
403
+ self.violations.append(
404
+ RuleViolation(
405
+ rule_name="dictionary_usage",
406
+ message=(
407
+ f"Function '{node.name}' uses Dict type hint for {context}. "
408
+ f"Consider using a typed object instead."
409
+ ),
410
+ file_path=self.file_path,
411
+ line=node.lineno,
412
+ column=node.col_offset,
413
+ severity="info",
414
+ suggestion=(
415
+ "Using Dict[str, Any] loses type information. Consider defining "
416
+ "a dataclass, TypedDict, or Pydantic model for structured data."
417
+ ),
418
+ code_snippet=self._get_source_line(node.lineno),
419
+ metadata={
420
+ "pattern": "dict_type_hint",
421
+ "context": context,
422
+ "function": node.name,
423
+ "class": self._current_class,
424
+ },
425
+ )
426
+ )
427
+ self.patterns.append(
428
+ {
429
+ "type": "dict_type_hint",
430
+ "line": node.lineno,
431
+ "context": context,
432
+ }
433
+ )
434
+
435
+ def _add_dict_param_violation(
436
+ self,
437
+ node: ast.FunctionDef | ast.AsyncFunctionDef,
438
+ arg: ast.arg,
439
+ ) -> None:
440
+ """Add violation for dict parameter type hint."""
441
+ self.dict_param_count += 1
442
+
443
+ self.violations.append(
444
+ RuleViolation(
445
+ rule_name="dictionary_usage",
446
+ message=(
447
+ f"Parameter '{arg.arg}' in function '{node.name}' uses Dict type hint. "
448
+ f"Consider using a typed object for structured data."
449
+ ),
450
+ file_path=self.file_path,
451
+ line=arg.lineno,
452
+ column=arg.col_offset,
453
+ severity="warning",
454
+ suggestion=(
455
+ f"Instead of passing a dict, define a dataclass or Pydantic model "
456
+ f"that represents the expected structure of '{arg.arg}'."
457
+ ),
458
+ code_snippet=self._get_source_line(node.lineno),
459
+ metadata={
460
+ "pattern": "dict_param",
461
+ "parameter": arg.arg,
462
+ "function": node.name,
463
+ "class": self._current_class,
464
+ },
465
+ )
466
+ )
467
+ self.patterns.append(
468
+ {
469
+ "type": "dict_param",
470
+ "line": arg.lineno,
471
+ "parameter": arg.arg,
472
+ }
473
+ )
474
+
475
+ def _add_dict_access_violation(
476
+ self,
477
+ node: ast.FunctionDef | ast.AsyncFunctionDef,
478
+ var_name: str,
479
+ keys: list[str],
480
+ ) -> None:
481
+ """Add violation for repeated dict key access."""
482
+ self.dict_access_count += 1
483
+ keys_str = ", ".join(f"'{k}'" for k in keys[:5])
484
+ if len(keys) > 5:
485
+ keys_str += ", ..."
486
+
487
+ self.violations.append(
488
+ RuleViolation(
489
+ rule_name="dictionary_usage",
490
+ message=(
491
+ f"Variable '{var_name}' accessed with multiple string keys [{keys_str}] "
492
+ f"in function '{node.name}'. This suggests structured data."
493
+ ),
494
+ file_path=self.file_path,
495
+ line=node.lineno,
496
+ column=node.col_offset,
497
+ severity="info",
498
+ suggestion=(
499
+ f"If '{var_name}' has a known structure, consider converting it to "
500
+ f"a dataclass or typed object for better type safety and IDE support."
501
+ ),
502
+ code_snippet=self._get_source_line(node.lineno),
503
+ metadata={
504
+ "pattern": "dict_access",
505
+ "variable": var_name,
506
+ "keys": keys,
507
+ "function": node.name,
508
+ "class": self._current_class,
509
+ },
510
+ )
511
+ )
512
+ self.patterns.append(
513
+ {
514
+ "type": "dict_access",
515
+ "line": node.lineno,
516
+ "variable": var_name,
517
+ "keys": keys,
518
+ }
519
+ )
520
+
521
+ def _get_source_line(self, line_number: int) -> str:
522
+ """Get a specific line from the source code."""
523
+ lines = self.source.splitlines()
524
+ if 1 <= line_number <= len(lines):
525
+ return lines[line_number - 1].strip()
526
+ return ""