mcp-ticketer 0.12.0__py3-none-any.whl → 2.2.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcp-ticketer might be problematic. Click here for more details.

Files changed (129) hide show
  1. mcp_ticketer/__init__.py +10 -10
  2. mcp_ticketer/__version__.py +1 -1
  3. mcp_ticketer/_version_scm.py +1 -0
  4. mcp_ticketer/adapters/aitrackdown.py +507 -6
  5. mcp_ticketer/adapters/asana/adapter.py +229 -0
  6. mcp_ticketer/adapters/asana/mappers.py +14 -0
  7. mcp_ticketer/adapters/github/__init__.py +26 -0
  8. mcp_ticketer/adapters/github/adapter.py +3229 -0
  9. mcp_ticketer/adapters/github/client.py +335 -0
  10. mcp_ticketer/adapters/github/mappers.py +797 -0
  11. mcp_ticketer/adapters/github/queries.py +692 -0
  12. mcp_ticketer/adapters/github/types.py +460 -0
  13. mcp_ticketer/adapters/hybrid.py +47 -5
  14. mcp_ticketer/adapters/jira/__init__.py +35 -0
  15. mcp_ticketer/adapters/jira/adapter.py +1351 -0
  16. mcp_ticketer/adapters/jira/client.py +271 -0
  17. mcp_ticketer/adapters/jira/mappers.py +246 -0
  18. mcp_ticketer/adapters/jira/queries.py +216 -0
  19. mcp_ticketer/adapters/jira/types.py +304 -0
  20. mcp_ticketer/adapters/linear/adapter.py +2730 -139
  21. mcp_ticketer/adapters/linear/client.py +175 -3
  22. mcp_ticketer/adapters/linear/mappers.py +203 -8
  23. mcp_ticketer/adapters/linear/queries.py +280 -3
  24. mcp_ticketer/adapters/linear/types.py +120 -4
  25. mcp_ticketer/analysis/__init__.py +56 -0
  26. mcp_ticketer/analysis/dependency_graph.py +255 -0
  27. mcp_ticketer/analysis/health_assessment.py +304 -0
  28. mcp_ticketer/analysis/orphaned.py +218 -0
  29. mcp_ticketer/analysis/project_status.py +594 -0
  30. mcp_ticketer/analysis/similarity.py +224 -0
  31. mcp_ticketer/analysis/staleness.py +266 -0
  32. mcp_ticketer/automation/__init__.py +11 -0
  33. mcp_ticketer/automation/project_updates.py +378 -0
  34. mcp_ticketer/cli/adapter_diagnostics.py +3 -1
  35. mcp_ticketer/cli/auggie_configure.py +17 -5
  36. mcp_ticketer/cli/codex_configure.py +97 -61
  37. mcp_ticketer/cli/configure.py +1288 -105
  38. mcp_ticketer/cli/cursor_configure.py +314 -0
  39. mcp_ticketer/cli/diagnostics.py +13 -12
  40. mcp_ticketer/cli/discover.py +5 -0
  41. mcp_ticketer/cli/gemini_configure.py +17 -5
  42. mcp_ticketer/cli/init_command.py +880 -0
  43. mcp_ticketer/cli/install_mcp_server.py +418 -0
  44. mcp_ticketer/cli/instruction_commands.py +6 -0
  45. mcp_ticketer/cli/main.py +267 -3175
  46. mcp_ticketer/cli/mcp_configure.py +821 -119
  47. mcp_ticketer/cli/mcp_server_commands.py +415 -0
  48. mcp_ticketer/cli/platform_detection.py +77 -12
  49. mcp_ticketer/cli/platform_installer.py +545 -0
  50. mcp_ticketer/cli/project_update_commands.py +350 -0
  51. mcp_ticketer/cli/setup_command.py +795 -0
  52. mcp_ticketer/cli/simple_health.py +12 -10
  53. mcp_ticketer/cli/ticket_commands.py +705 -103
  54. mcp_ticketer/cli/utils.py +113 -0
  55. mcp_ticketer/core/__init__.py +56 -6
  56. mcp_ticketer/core/adapter.py +533 -2
  57. mcp_ticketer/core/config.py +21 -21
  58. mcp_ticketer/core/exceptions.py +7 -1
  59. mcp_ticketer/core/label_manager.py +732 -0
  60. mcp_ticketer/core/mappers.py +31 -19
  61. mcp_ticketer/core/milestone_manager.py +252 -0
  62. mcp_ticketer/core/models.py +480 -0
  63. mcp_ticketer/core/onepassword_secrets.py +1 -1
  64. mcp_ticketer/core/priority_matcher.py +463 -0
  65. mcp_ticketer/core/project_config.py +132 -14
  66. mcp_ticketer/core/project_utils.py +281 -0
  67. mcp_ticketer/core/project_validator.py +376 -0
  68. mcp_ticketer/core/session_state.py +176 -0
  69. mcp_ticketer/core/state_matcher.py +625 -0
  70. mcp_ticketer/core/url_parser.py +425 -0
  71. mcp_ticketer/core/validators.py +69 -0
  72. mcp_ticketer/mcp/server/__main__.py +2 -1
  73. mcp_ticketer/mcp/server/diagnostic_helper.py +175 -0
  74. mcp_ticketer/mcp/server/main.py +106 -25
  75. mcp_ticketer/mcp/server/routing.py +723 -0
  76. mcp_ticketer/mcp/server/server_sdk.py +58 -0
  77. mcp_ticketer/mcp/server/tools/__init__.py +33 -11
  78. mcp_ticketer/mcp/server/tools/analysis_tools.py +854 -0
  79. mcp_ticketer/mcp/server/tools/attachment_tools.py +5 -5
  80. mcp_ticketer/mcp/server/tools/bulk_tools.py +259 -202
  81. mcp_ticketer/mcp/server/tools/comment_tools.py +74 -12
  82. mcp_ticketer/mcp/server/tools/config_tools.py +1391 -145
  83. mcp_ticketer/mcp/server/tools/diagnostic_tools.py +211 -0
  84. mcp_ticketer/mcp/server/tools/hierarchy_tools.py +870 -460
  85. mcp_ticketer/mcp/server/tools/instruction_tools.py +7 -5
  86. mcp_ticketer/mcp/server/tools/label_tools.py +942 -0
  87. mcp_ticketer/mcp/server/tools/milestone_tools.py +338 -0
  88. mcp_ticketer/mcp/server/tools/pr_tools.py +3 -7
  89. mcp_ticketer/mcp/server/tools/project_status_tools.py +158 -0
  90. mcp_ticketer/mcp/server/tools/project_update_tools.py +473 -0
  91. mcp_ticketer/mcp/server/tools/search_tools.py +209 -97
  92. mcp_ticketer/mcp/server/tools/session_tools.py +308 -0
  93. mcp_ticketer/mcp/server/tools/ticket_tools.py +1107 -124
  94. mcp_ticketer/mcp/server/tools/user_ticket_tools.py +218 -236
  95. mcp_ticketer/queue/queue.py +68 -0
  96. mcp_ticketer/queue/worker.py +1 -1
  97. mcp_ticketer/utils/__init__.py +5 -0
  98. mcp_ticketer/utils/token_utils.py +246 -0
  99. mcp_ticketer-2.2.13.dist-info/METADATA +1396 -0
  100. mcp_ticketer-2.2.13.dist-info/RECORD +158 -0
  101. mcp_ticketer-2.2.13.dist-info/top_level.txt +2 -0
  102. py_mcp_installer/examples/phase3_demo.py +178 -0
  103. py_mcp_installer/scripts/manage_version.py +54 -0
  104. py_mcp_installer/setup.py +6 -0
  105. py_mcp_installer/src/py_mcp_installer/__init__.py +153 -0
  106. py_mcp_installer/src/py_mcp_installer/command_builder.py +445 -0
  107. py_mcp_installer/src/py_mcp_installer/config_manager.py +541 -0
  108. py_mcp_installer/src/py_mcp_installer/exceptions.py +243 -0
  109. py_mcp_installer/src/py_mcp_installer/installation_strategy.py +617 -0
  110. py_mcp_installer/src/py_mcp_installer/installer.py +656 -0
  111. py_mcp_installer/src/py_mcp_installer/mcp_inspector.py +750 -0
  112. py_mcp_installer/src/py_mcp_installer/platform_detector.py +451 -0
  113. py_mcp_installer/src/py_mcp_installer/platforms/__init__.py +26 -0
  114. py_mcp_installer/src/py_mcp_installer/platforms/claude_code.py +225 -0
  115. py_mcp_installer/src/py_mcp_installer/platforms/codex.py +181 -0
  116. py_mcp_installer/src/py_mcp_installer/platforms/cursor.py +191 -0
  117. py_mcp_installer/src/py_mcp_installer/types.py +222 -0
  118. py_mcp_installer/src/py_mcp_installer/utils.py +463 -0
  119. py_mcp_installer/tests/__init__.py +0 -0
  120. py_mcp_installer/tests/platforms/__init__.py +0 -0
  121. py_mcp_installer/tests/test_platform_detector.py +17 -0
  122. mcp_ticketer/adapters/github.py +0 -1574
  123. mcp_ticketer/adapters/jira.py +0 -1258
  124. mcp_ticketer-0.12.0.dist-info/METADATA +0 -550
  125. mcp_ticketer-0.12.0.dist-info/RECORD +0 -91
  126. mcp_ticketer-0.12.0.dist-info/top_level.txt +0 -1
  127. {mcp_ticketer-0.12.0.dist-info → mcp_ticketer-2.2.13.dist-info}/WHEEL +0 -0
  128. {mcp_ticketer-0.12.0.dist-info → mcp_ticketer-2.2.13.dist-info}/entry_points.txt +0 -0
  129. {mcp_ticketer-0.12.0.dist-info → mcp_ticketer-2.2.13.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,732 @@
1
+ """Label management and normalization for ticket systems.
2
+
3
+ This module provides intelligent label matching, normalization, and deduplication
4
+ to maintain consistent labeling across different ticket management platforms.
5
+
6
+ Features:
7
+ - Multi-stage label matching (exact → fuzzy → spelling correction)
8
+ - Configurable casing strategies (lowercase, titlecase, uppercase, kebab-case, snake_case)
9
+ - Spelling dictionary for common typos and variations
10
+ - Fuzzy matching with confidence scoring
11
+ - Duplicate detection with similarity thresholds
12
+ - Consolidation suggestions for similar labels
13
+
14
+ Design Decision: Three-Stage Matching Pipeline
15
+ ----------------------------------------------
16
+ The label matcher uses a cascading approach to maximize accuracy:
17
+
18
+ 1. Exact Match: Direct label match with normalized casing (confidence: 1.0)
19
+ 2. Spelling Correction: Check against common misspellings (confidence: 0.95)
20
+ 3. Fuzzy Match: Levenshtein distance with thresholds (confidence: 0.70-0.95)
21
+
22
+ This approach ensures high confidence for common labels while gracefully handling
23
+ typos and variations.
24
+
25
+ Performance Considerations:
26
+ - Average match time: <5ms (target: <10ms)
27
+ - Exact match: O(1) with dict/set lookup
28
+ - Fuzzy matching: O(n) where n = number of available labels
29
+ - Memory footprint: <2MB for normalizer instance with 1000 labels
30
+
31
+ Trade-offs:
32
+ - Performance vs. Flexibility: Chose fuzzy matching over ML embeddings for speed
33
+ - Memory vs. Accuracy: Spelling dictionary trades memory for correction quality
34
+ - Simplicity vs. Intelligence: Three-stage pipeline balances both
35
+
36
+ Example:
37
+ >>> normalizer = LabelNormalizer(casing="lowercase")
38
+ >>> result = normalizer.normalize("Bug-Report")
39
+ >>> print(result)
40
+ bug-report
41
+
42
+ >>> matches = normalizer.find_similar("perfomance", available_labels, threshold=0.8)
43
+ >>> for match in matches:
44
+ ... print(f"{match.label}: {match.confidence}")
45
+ performance: 0.95
46
+
47
+ >>> deduplicator = LabelDeduplicator()
48
+ >>> duplicates = deduplicator.find_duplicates(labels, threshold=0.85)
49
+ >>> for label1, label2, score in duplicates:
50
+ ... print(f"{label1} ≈ {label2} (similarity: {score:.2f})")
51
+
52
+ """
53
+
54
+ from __future__ import annotations
55
+
56
+ from dataclasses import dataclass
57
+ from enum import Enum
58
+
59
+ try:
60
+ from rapidfuzz import fuzz
61
+
62
+ FUZZY_AVAILABLE = True
63
+ except ImportError:
64
+ FUZZY_AVAILABLE = False
65
+
66
+
67
+ class CasingStrategy(str, Enum):
68
+ """Supported casing strategies for label normalization.
69
+
70
+ Attributes:
71
+ LOWERCASE: Convert to lowercase (e.g., "bug report")
72
+ TITLECASE: Convert to title case (e.g., "Bug Report")
73
+ UPPERCASE: Convert to uppercase (e.g., "BUG REPORT")
74
+ KEBAB_CASE: Convert to kebab-case (e.g., "bug-report")
75
+ SNAKE_CASE: Convert to snake_case (e.g., "bug_report")
76
+
77
+ """
78
+
79
+ LOWERCASE = "lowercase"
80
+ TITLECASE = "titlecase"
81
+ UPPERCASE = "uppercase"
82
+ KEBAB_CASE = "kebab-case"
83
+ SNAKE_CASE = "snake_case"
84
+
85
+
86
+ @dataclass
87
+ class LabelMatch:
88
+ """Result of a label matching operation.
89
+
90
+ Attributes:
91
+ label: Matched label string
92
+ confidence: Confidence score (0.0-1.0)
93
+ match_type: Type of match used (exact, spelling, fuzzy)
94
+ original_input: Original user input string
95
+ suggestions: Alternative matches for ambiguous inputs
96
+
97
+ """
98
+
99
+ label: str
100
+ confidence: float
101
+ match_type: str
102
+ original_input: str
103
+ suggestions: list[LabelMatch] | None = None
104
+
105
+ def is_high_confidence(self) -> bool:
106
+ """Check if confidence is high enough for auto-apply."""
107
+ return self.confidence >= 0.90
108
+
109
+ def is_medium_confidence(self) -> bool:
110
+ """Check if confidence is medium (needs confirmation)."""
111
+ return 0.70 <= self.confidence < 0.90
112
+
113
+ def is_low_confidence(self) -> bool:
114
+ """Check if confidence is too low (ambiguous)."""
115
+ return self.confidence < 0.70
116
+
117
+
118
+ class LabelNormalizer:
119
+ """Label normalizer with configurable casing and spelling correction.
120
+
121
+ Normalizes label strings to a consistent format and provides fuzzy matching
122
+ capabilities with confidence scoring.
123
+
124
+ The normalizer supports multiple casing strategies and includes a spelling
125
+ dictionary for common typos and variations.
126
+
127
+ Example:
128
+ >>> normalizer = LabelNormalizer(casing="kebab-case")
129
+ >>> print(normalizer.normalize("Bug Report"))
130
+ bug-report
131
+
132
+ >>> available = ["bug", "feature", "performance"]
133
+ >>> matches = normalizer.find_similar("perfomance", available, threshold=0.8)
134
+ >>> print(matches[0].label)
135
+ performance
136
+
137
+ """
138
+
139
+ # Spelling dictionary: common misspellings → correct spelling
140
+ SPELLING_CORRECTIONS: dict[str, str] = {
141
+ # Common typos
142
+ "feture": "feature",
143
+ "featrue": "feature",
144
+ "feautre": "feature",
145
+ "perfomance": "performance",
146
+ "peformance": "performance",
147
+ "performace": "performance",
148
+ "documention": "documentation",
149
+ "documentaion": "documentation",
150
+ "bugfix": "bug-fix",
151
+ "hotfix": "hot-fix",
152
+ "enhancment": "enhancement",
153
+ "improvment": "improvement",
154
+ "refacor": "refactor",
155
+ "refactro": "refactor",
156
+ "secuirty": "security",
157
+ "securty": "security",
158
+ "authenciation": "authentication",
159
+ "authentcation": "authentication",
160
+ "authorisation": "authorization",
161
+ "databse": "database",
162
+ "databae": "database",
163
+ "backend": "back-end",
164
+ "frontend": "front-end",
165
+ "fullstack": "full-stack",
166
+ # Plural variations (singular → plural)
167
+ "bugs": "bug",
168
+ "features": "feature",
169
+ "enhancements": "enhancement",
170
+ "improvements": "improvement",
171
+ "issues": "issue",
172
+ "tasks": "task",
173
+ "stories": "story",
174
+ "epics": "epic",
175
+ # Common variations
176
+ "api-endpoint": "api",
177
+ "ui-bug": "ui",
178
+ "ux-issue": "ux",
179
+ "db-migration": "database",
180
+ "sql-query": "database",
181
+ "test-case": "testing",
182
+ "unit-test": "testing",
183
+ "integration-test": "testing",
184
+ "e2e-test": "testing",
185
+ "code-review": "review",
186
+ "pr-review": "review",
187
+ "needs-review": "review",
188
+ # Priority-like labels
189
+ "urgent": "critical",
190
+ "high-priority": "high",
191
+ "low-priority": "low",
192
+ "blocker": "blocked",
193
+ "blocking": "blocked",
194
+ }
195
+
196
+ # Confidence thresholds
197
+ CONFIDENCE_HIGH = 0.90
198
+ CONFIDENCE_MEDIUM = 0.70
199
+ FUZZY_THRESHOLD_HIGH = 90
200
+ FUZZY_THRESHOLD_MEDIUM = 70
201
+
202
+ def __init__(self, casing: str = "lowercase") -> None:
203
+ """Initialize label normalizer with casing strategy.
204
+
205
+ Args:
206
+ casing: Casing strategy - one of: lowercase, titlecase, uppercase,
207
+ kebab-case, snake_case (default: lowercase)
208
+
209
+ Raises:
210
+ ValueError: If casing strategy is not supported
211
+
212
+ """
213
+ try:
214
+ self.casing = CasingStrategy(casing)
215
+ except ValueError as e:
216
+ valid_options = ", ".join(c.value for c in CasingStrategy)
217
+ raise ValueError(
218
+ f"Invalid casing strategy '{casing}'. "
219
+ f"Valid options: {valid_options}"
220
+ ) from e
221
+
222
+ # Build reverse spelling lookup for O(1) correction
223
+ self._spelling_map: dict[str, str] = {}
224
+ for wrong, correct in self.SPELLING_CORRECTIONS.items():
225
+ normalized_wrong = self._normalize_case(wrong)
226
+ normalized_correct = self._normalize_case(correct)
227
+ self._spelling_map[normalized_wrong] = normalized_correct
228
+
229
+ def normalize(self, label: str) -> str:
230
+ """Normalize label to configured casing strategy.
231
+
232
+ Args:
233
+ label: Raw label string to normalize
234
+
235
+ Returns:
236
+ Normalized label string with consistent casing
237
+
238
+ Example:
239
+ >>> normalizer = LabelNormalizer(casing="kebab-case")
240
+ >>> normalizer.normalize("Bug Report")
241
+ 'bug-report'
242
+
243
+ >>> normalizer = LabelNormalizer(casing="snake_case")
244
+ >>> normalizer.normalize("Bug Report")
245
+ 'bug_report'
246
+
247
+ """
248
+ if not label:
249
+ return ""
250
+
251
+ # Just apply casing strategy (spelling correction only in find_similar)
252
+ return self._normalize_case(label)
253
+
254
+ def find_similar(
255
+ self,
256
+ label: str,
257
+ available_labels: list[str] | set[str],
258
+ threshold: float = 0.80,
259
+ ) -> list[LabelMatch]:
260
+ """Find similar labels from available options using fuzzy matching.
261
+
262
+ Uses three-stage matching pipeline:
263
+ 1. Exact match (case-insensitive)
264
+ 2. Spelling correction
265
+ 3. Fuzzy matching with Levenshtein distance
266
+
267
+ Args:
268
+ label: Input label to match
269
+ available_labels: List of available labels to match against
270
+ threshold: Minimum similarity threshold (0.0-1.0, default: 0.80)
271
+
272
+ Returns:
273
+ List of LabelMatch objects sorted by confidence (highest first)
274
+
275
+ Example:
276
+ >>> normalizer = LabelNormalizer()
277
+ >>> available = ["bug", "feature", "performance", "documentation"]
278
+ >>> matches = normalizer.find_similar("perfomance", available, threshold=0.8)
279
+ >>> print(matches[0].label, matches[0].confidence)
280
+ performance 0.95
281
+
282
+ """
283
+ if not label or not available_labels:
284
+ return []
285
+
286
+ normalized_input = self.normalize(label)
287
+ normalized_available = {self.normalize(lbl): lbl for lbl in available_labels}
288
+
289
+ results: list[LabelMatch] = []
290
+
291
+ # Stage 1: Exact match (case-insensitive)
292
+ if normalized_input in normalized_available:
293
+ results.append(
294
+ LabelMatch(
295
+ label=normalized_available[normalized_input],
296
+ confidence=1.0,
297
+ match_type="exact",
298
+ original_input=label,
299
+ )
300
+ )
301
+ return results
302
+
303
+ # Stage 2: Spelling correction
304
+ corrected = self._apply_spelling_correction(normalized_input)
305
+ if corrected != normalized_input and corrected in normalized_available:
306
+ results.append(
307
+ LabelMatch(
308
+ label=normalized_available[corrected],
309
+ confidence=0.95,
310
+ match_type="spelling",
311
+ original_input=label,
312
+ )
313
+ )
314
+ return results
315
+
316
+ # Stage 3: Fuzzy matching
317
+ if FUZZY_AVAILABLE:
318
+ results = self._fuzzy_match(
319
+ normalized_input, normalized_available, threshold, label
320
+ )
321
+
322
+ return results
323
+
324
+ def _normalize_case(self, text: str) -> str:
325
+ """Apply casing strategy to text.
326
+
327
+ Args:
328
+ text: Text to normalize
329
+
330
+ Returns:
331
+ Text with applied casing strategy
332
+
333
+ """
334
+ text = text.strip()
335
+
336
+ if self.casing == CasingStrategy.LOWERCASE:
337
+ return text.lower()
338
+ elif self.casing == CasingStrategy.UPPERCASE:
339
+ return text.upper()
340
+ elif self.casing == CasingStrategy.TITLECASE:
341
+ return text.title()
342
+ elif self.casing == CasingStrategy.KEBAB_CASE:
343
+ # Replace spaces and underscores with hyphens
344
+ result = text.lower().replace(" ", "-").replace("_", "-")
345
+ # Remove duplicate hyphens
346
+ while "--" in result:
347
+ result = result.replace("--", "-")
348
+ return result
349
+ elif self.casing == CasingStrategy.SNAKE_CASE:
350
+ # Replace spaces and hyphens with underscores
351
+ result = text.lower().replace(" ", "_").replace("-", "_")
352
+ # Remove duplicate underscores
353
+ while "__" in result:
354
+ result = result.replace("__", "_")
355
+ return result
356
+ else:
357
+ return text.lower() # Default to lowercase
358
+
359
+ def _apply_spelling_correction(self, label: str) -> str:
360
+ """Apply spelling corrections from dictionary.
361
+
362
+ Only corrects if the entire label matches a known misspelling.
363
+ Does not correct partial matches or compound labels.
364
+
365
+ Args:
366
+ label: Label to correct (should be normalized)
367
+
368
+ Returns:
369
+ Corrected label if found in dictionary, otherwise original
370
+
371
+ """
372
+ # Only apply correction if exact match in spelling map
373
+ return self._spelling_map.get(label, label)
374
+
375
+ def _fuzzy_match(
376
+ self,
377
+ normalized_input: str,
378
+ normalized_available: dict[str, str],
379
+ threshold: float,
380
+ original_input: str,
381
+ ) -> list[LabelMatch]:
382
+ """Perform fuzzy matching using Levenshtein distance.
383
+
384
+ Args:
385
+ normalized_input: Normalized input label
386
+ normalized_available: Dict of normalized → original labels
387
+ threshold: Similarity threshold (0.0-1.0)
388
+ original_input: Original user input
389
+
390
+ Returns:
391
+ List of LabelMatch objects sorted by confidence
392
+
393
+ """
394
+ matches: list[tuple[str, float]] = []
395
+
396
+ for normalized_label, original_label in normalized_available.items():
397
+ similarity = fuzz.ratio(normalized_input, normalized_label)
398
+
399
+ # Convert similarity (0-100) to confidence (0.0-1.0)
400
+ confidence = similarity / 100.0
401
+
402
+ if confidence >= threshold:
403
+ matches.append((original_label, confidence))
404
+
405
+ # Sort by confidence descending
406
+ matches.sort(key=lambda x: x[1], reverse=True)
407
+
408
+ # Convert to LabelMatch objects
409
+ return [
410
+ LabelMatch(
411
+ label=lbl,
412
+ confidence=conf,
413
+ match_type="fuzzy",
414
+ original_input=original_input,
415
+ )
416
+ for lbl, conf in matches
417
+ ]
418
+
419
+
420
+ class LabelDeduplicator:
421
+ """Label deduplicator for finding and consolidating similar labels.
422
+
423
+ Identifies duplicate labels using multiple strategies:
424
+ - Exact duplicates (case-insensitive)
425
+ - Fuzzy duplicates (Levenshtein similarity)
426
+ - Plural variations (e.g., "bug" vs "bugs")
427
+ - Common synonyms (e.g., "bug" vs "issue")
428
+
429
+ Example:
430
+ >>> deduplicator = LabelDeduplicator()
431
+ >>> labels = ["bug", "Bug", "bugs", "feature", "Feature Request"]
432
+ >>> duplicates = deduplicator.find_duplicates(labels, threshold=0.85)
433
+ >>> for label1, label2, score in duplicates:
434
+ ... print(f"{label1} ≈ {label2} (similarity: {score:.2f})")
435
+ bug ≈ Bug (similarity: 1.00)
436
+ bug ≈ bugs (similarity: 0.93)
437
+
438
+ >>> suggestions = deduplicator.suggest_consolidation(labels)
439
+ >>> for canonical, variants in suggestions.items():
440
+ ... print(f"{canonical}: {', '.join(variants)}")
441
+ bug: Bug, bugs
442
+
443
+ """
444
+
445
+ # Similarity threshold for considering labels as duplicates
446
+ DEFAULT_THRESHOLD = 0.85
447
+
448
+ # Common label synonyms
449
+ LABEL_SYNONYMS: dict[str, set[str]] = {
450
+ "bug": {"issue", "defect", "problem", "error"},
451
+ "feature": {"enhancement", "improvement", "new feature"},
452
+ "documentation": {"docs", "doc", "readme"},
453
+ "testing": {"test", "qa", "quality assurance"},
454
+ "security": {"vulnerability", "cve", "exploit"},
455
+ "performance": {"optimization", "speed", "efficiency"},
456
+ "ui": {"ux", "user interface", "frontend"},
457
+ "backend": {"back-end", "server", "api"},
458
+ "database": {"db", "sql", "data"},
459
+ "refactor": {"refactoring", "cleanup", "tech debt"},
460
+ }
461
+
462
+ def find_duplicates(
463
+ self,
464
+ labels: list[str],
465
+ threshold: float | None = None,
466
+ ) -> list[tuple[str, str, float]]:
467
+ """Find duplicate labels with similarity scores.
468
+
469
+ Compares all labels pairwise and returns those exceeding the similarity
470
+ threshold. Results are sorted by similarity score descending.
471
+
472
+ Args:
473
+ labels: List of labels to check for duplicates
474
+ threshold: Similarity threshold (0.0-1.0, default: 0.85)
475
+
476
+ Returns:
477
+ List of (label1, label2, similarity_score) tuples sorted by score
478
+
479
+ Example:
480
+ >>> deduplicator = LabelDeduplicator()
481
+ >>> labels = ["bug", "Bug", "bugs", "feature", "feture"]
482
+ >>> duplicates = deduplicator.find_duplicates(labels)
483
+ >>> for l1, l2, score in duplicates:
484
+ ... print(f"{l1} ≈ {l2}: {score:.2f}")
485
+ bug ≈ Bug: 1.00
486
+ bug ≈ bugs: 0.93
487
+ feature ≈ feture: 0.92
488
+
489
+ """
490
+ if not labels:
491
+ return []
492
+
493
+ threshold = threshold or self.DEFAULT_THRESHOLD
494
+ duplicates: list[tuple[str, str, float]] = []
495
+
496
+ # Compare all pairs
497
+ for i, label1 in enumerate(labels):
498
+ for label2 in labels[i + 1 :]:
499
+ similarity = self._calculate_similarity(label1, label2)
500
+ if similarity >= threshold:
501
+ duplicates.append((label1, label2, similarity))
502
+
503
+ # Sort by similarity descending
504
+ duplicates.sort(key=lambda x: x[2], reverse=True)
505
+
506
+ return duplicates
507
+
508
+ def suggest_consolidation(
509
+ self,
510
+ labels: list[str],
511
+ threshold: float | None = None,
512
+ ) -> dict[str, list[str]]:
513
+ """Suggest label consolidations for similar labels.
514
+
515
+ Groups similar labels together and suggests a canonical label for each group.
516
+ The canonical label is typically the most common or shortest variant.
517
+
518
+ Args:
519
+ labels: List of labels to consolidate
520
+ threshold: Similarity threshold (0.0-1.0, default: 0.85)
521
+
522
+ Returns:
523
+ Dictionary mapping canonical label → list of similar variants
524
+
525
+ Example:
526
+ >>> deduplicator = LabelDeduplicator()
527
+ >>> labels = ["bug", "Bug", "bugs", "feature", "feture", "features"]
528
+ >>> suggestions = deduplicator.suggest_consolidation(labels)
529
+ >>> for canonical, variants in suggestions.items():
530
+ ... print(f"Use '{canonical}' instead of: {', '.join(variants)}")
531
+ Use 'bug' instead of: Bug, bugs
532
+ Use 'feature' instead of: feture, features
533
+
534
+ """
535
+ if not labels:
536
+ return {}
537
+
538
+ threshold = threshold or self.DEFAULT_THRESHOLD
539
+ duplicates = self.find_duplicates(labels, threshold)
540
+
541
+ # Build graph of similar labels
542
+ similarity_graph: dict[str, set[str]] = {label: set() for label in labels}
543
+
544
+ for label1, label2, _ in duplicates:
545
+ similarity_graph[label1].add(label2)
546
+ similarity_graph[label2].add(label1)
547
+
548
+ # Find connected components (groups of similar labels)
549
+ visited: set[str] = set()
550
+ groups: list[set[str]] = []
551
+
552
+ for label in labels:
553
+ if label in visited:
554
+ continue
555
+
556
+ # BFS to find connected component
557
+ group = self._find_connected_component(label, similarity_graph)
558
+ groups.append(group)
559
+ visited.update(group)
560
+
561
+ # Select canonical label for each group
562
+ consolidations: dict[str, list[str]] = {}
563
+
564
+ for group in groups:
565
+ if len(group) <= 1:
566
+ continue # No duplicates
567
+
568
+ # Choose canonical label (prefer lowercase, then shortest)
569
+ canonical = min(group, key=lambda x: (not x.islower(), len(x), x))
570
+
571
+ variants = [lbl for lbl in group if lbl != canonical]
572
+ if variants:
573
+ consolidations[canonical] = variants
574
+
575
+ return consolidations
576
+
577
+ def _calculate_similarity(self, label1: str, label2: str) -> float:
578
+ """Calculate similarity score between two labels.
579
+
580
+ Uses multiple similarity checks:
581
+ 1. Case-insensitive exact match → 1.0
582
+ 2. Synonym match → 0.95
583
+ 3. Fuzzy matching (if available) → 0.0-1.0
584
+
585
+ Args:
586
+ label1: First label
587
+ label2: Second label
588
+
589
+ Returns:
590
+ Similarity score (0.0-1.0)
591
+
592
+ """
593
+ # Normalize for comparison
594
+ norm1 = label1.lower().strip()
595
+ norm2 = label2.lower().strip()
596
+
597
+ # Exact match (case-insensitive)
598
+ if norm1 == norm2:
599
+ return 1.0
600
+
601
+ # Check synonyms
602
+ if self._are_synonyms(norm1, norm2):
603
+ return 0.95
604
+
605
+ # Fuzzy matching
606
+ if FUZZY_AVAILABLE:
607
+ similarity = fuzz.ratio(norm1, norm2)
608
+ return similarity / 100.0
609
+
610
+ # Fallback: simple string comparison
611
+ return 1.0 if norm1 == norm2 else 0.0
612
+
613
+ def _are_synonyms(self, label1: str, label2: str) -> bool:
614
+ """Check if two labels are synonyms.
615
+
616
+ Args:
617
+ label1: First label (normalized)
618
+ label2: Second label (normalized)
619
+
620
+ Returns:
621
+ True if labels are synonyms, False otherwise
622
+
623
+ """
624
+ for canonical, synonyms in self.LABEL_SYNONYMS.items():
625
+ if label1 == canonical and label2 in synonyms:
626
+ return True
627
+ if label2 == canonical and label1 in synonyms:
628
+ return True
629
+ if label1 in synonyms and label2 in synonyms:
630
+ return True
631
+
632
+ return False
633
+
634
+ def _find_connected_component(
635
+ self,
636
+ start: str,
637
+ graph: dict[str, set[str]],
638
+ ) -> set[str]:
639
+ """Find connected component in similarity graph using BFS.
640
+
641
+ Args:
642
+ start: Starting label
643
+ graph: Adjacency list of label similarities
644
+
645
+ Returns:
646
+ Set of labels in the connected component
647
+
648
+ """
649
+ visited = {start}
650
+ queue = [start]
651
+
652
+ while queue:
653
+ label = queue.pop(0)
654
+
655
+ for neighbor in graph[label]:
656
+ if neighbor not in visited:
657
+ visited.add(neighbor)
658
+ queue.append(neighbor)
659
+
660
+ return visited
661
+
662
+
663
+ # Convenience functions for common operations
664
+
665
+
666
+ def normalize_label(label: str, casing: str = "lowercase") -> str:
667
+ """Normalize a single label with specified casing strategy.
668
+
669
+ Convenience function that creates a LabelNormalizer instance.
670
+
671
+ Args:
672
+ label: Label to normalize
673
+ casing: Casing strategy (default: lowercase)
674
+
675
+ Returns:
676
+ Normalized label string
677
+
678
+ Example:
679
+ >>> normalize_label("Bug Report", casing="kebab-case")
680
+ 'bug-report'
681
+
682
+ """
683
+ normalizer = LabelNormalizer(casing=casing)
684
+ return normalizer.normalize(label)
685
+
686
+
687
+ def find_duplicate_labels(
688
+ labels: list[str],
689
+ threshold: float = 0.85,
690
+ ) -> list[tuple[str, str, float]]:
691
+ """Find duplicate labels in a list.
692
+
693
+ Convenience function that creates a LabelDeduplicator instance.
694
+
695
+ Args:
696
+ labels: List of labels to check
697
+ threshold: Similarity threshold (default: 0.85)
698
+
699
+ Returns:
700
+ List of (label1, label2, similarity_score) tuples
701
+
702
+ Example:
703
+ >>> labels = ["bug", "Bug", "bugs", "feature"]
704
+ >>> duplicates = find_duplicate_labels(labels)
705
+ >>> for l1, l2, score in duplicates:
706
+ ... print(f"{l1} ≈ {l2}: {score:.2f}")
707
+
708
+ """
709
+ deduplicator = LabelDeduplicator()
710
+ return deduplicator.find_duplicates(labels, threshold)
711
+
712
+
713
+ # Singleton instance for convenience
714
+ _default_normalizer: LabelNormalizer | None = None
715
+
716
+
717
+ def get_label_normalizer(casing: str = "lowercase") -> LabelNormalizer:
718
+ """Get default label normalizer instance.
719
+
720
+ Creates or returns cached normalizer with specified casing.
721
+
722
+ Args:
723
+ casing: Casing strategy (default: lowercase)
724
+
725
+ Returns:
726
+ LabelNormalizer instance
727
+
728
+ """
729
+ global _default_normalizer
730
+ if _default_normalizer is None or _default_normalizer.casing.value != casing:
731
+ _default_normalizer = LabelNormalizer(casing=casing)
732
+ return _default_normalizer