codeshift 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. codeshift/__init__.py +8 -0
  2. codeshift/analyzer/__init__.py +5 -0
  3. codeshift/analyzer/risk_assessor.py +388 -0
  4. codeshift/api/__init__.py +1 -0
  5. codeshift/api/auth.py +182 -0
  6. codeshift/api/config.py +73 -0
  7. codeshift/api/database.py +215 -0
  8. codeshift/api/main.py +103 -0
  9. codeshift/api/models/__init__.py +55 -0
  10. codeshift/api/models/auth.py +108 -0
  11. codeshift/api/models/billing.py +92 -0
  12. codeshift/api/models/migrate.py +42 -0
  13. codeshift/api/models/usage.py +116 -0
  14. codeshift/api/routers/__init__.py +5 -0
  15. codeshift/api/routers/auth.py +440 -0
  16. codeshift/api/routers/billing.py +395 -0
  17. codeshift/api/routers/migrate.py +304 -0
  18. codeshift/api/routers/usage.py +291 -0
  19. codeshift/api/routers/webhooks.py +289 -0
  20. codeshift/cli/__init__.py +5 -0
  21. codeshift/cli/commands/__init__.py +7 -0
  22. codeshift/cli/commands/apply.py +352 -0
  23. codeshift/cli/commands/auth.py +842 -0
  24. codeshift/cli/commands/diff.py +221 -0
  25. codeshift/cli/commands/scan.py +368 -0
  26. codeshift/cli/commands/upgrade.py +436 -0
  27. codeshift/cli/commands/upgrade_all.py +518 -0
  28. codeshift/cli/main.py +221 -0
  29. codeshift/cli/quota.py +210 -0
  30. codeshift/knowledge/__init__.py +50 -0
  31. codeshift/knowledge/cache.py +167 -0
  32. codeshift/knowledge/generator.py +231 -0
  33. codeshift/knowledge/models.py +151 -0
  34. codeshift/knowledge/parser.py +270 -0
  35. codeshift/knowledge/sources.py +388 -0
  36. codeshift/knowledge_base/__init__.py +17 -0
  37. codeshift/knowledge_base/loader.py +102 -0
  38. codeshift/knowledge_base/models.py +110 -0
  39. codeshift/migrator/__init__.py +23 -0
  40. codeshift/migrator/ast_transforms.py +256 -0
  41. codeshift/migrator/engine.py +395 -0
  42. codeshift/migrator/llm_migrator.py +320 -0
  43. codeshift/migrator/transforms/__init__.py +19 -0
  44. codeshift/migrator/transforms/fastapi_transformer.py +174 -0
  45. codeshift/migrator/transforms/pandas_transformer.py +236 -0
  46. codeshift/migrator/transforms/pydantic_v1_to_v2.py +637 -0
  47. codeshift/migrator/transforms/requests_transformer.py +218 -0
  48. codeshift/migrator/transforms/sqlalchemy_transformer.py +175 -0
  49. codeshift/scanner/__init__.py +6 -0
  50. codeshift/scanner/code_scanner.py +352 -0
  51. codeshift/scanner/dependency_parser.py +473 -0
  52. codeshift/utils/__init__.py +5 -0
  53. codeshift/utils/api_client.py +266 -0
  54. codeshift/utils/cache.py +318 -0
  55. codeshift/utils/config.py +71 -0
  56. codeshift/utils/llm_client.py +221 -0
  57. codeshift/validator/__init__.py +6 -0
  58. codeshift/validator/syntax_checker.py +183 -0
  59. codeshift/validator/test_runner.py +224 -0
  60. codeshift-0.2.0.dist-info/METADATA +326 -0
  61. codeshift-0.2.0.dist-info/RECORD +65 -0
  62. codeshift-0.2.0.dist-info/WHEEL +5 -0
  63. codeshift-0.2.0.dist-info/entry_points.txt +2 -0
  64. codeshift-0.2.0.dist-info/licenses/LICENSE +21 -0
  65. codeshift-0.2.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,231 @@
1
+ """Knowledge base generator - orchestrates the knowledge acquisition pipeline."""
2
+
3
+ from collections.abc import Callable
4
+
5
+ from codeshift.knowledge.cache import KnowledgeCache, get_knowledge_cache
6
+ from codeshift.knowledge.models import (
7
+ BreakingChange,
8
+ Confidence,
9
+ GeneratedKnowledgeBase,
10
+ )
11
+ from codeshift.knowledge.parser import ChangelogParser, get_changelog_parser
12
+ from codeshift.knowledge.sources import SourceFetcher, get_source_fetcher
13
+
14
+
15
+ class KnowledgeGenerator:
16
+ """Orchestrates knowledge base generation from multiple sources."""
17
+
18
+ def __init__(
19
+ self,
20
+ fetcher: SourceFetcher | None = None,
21
+ parser: ChangelogParser | None = None,
22
+ cache: KnowledgeCache | None = None,
23
+ use_cache: bool = True,
24
+ ):
25
+ """Initialize the generator.
26
+
27
+ Args:
28
+ fetcher: Source fetcher instance.
29
+ parser: Changelog parser instance.
30
+ cache: Knowledge cache instance.
31
+ use_cache: Whether to use caching.
32
+ """
33
+ self.fetcher = fetcher or get_source_fetcher()
34
+ self.parser = parser or get_changelog_parser()
35
+ self.cache = cache or get_knowledge_cache() if use_cache else None
36
+ self.use_cache = use_cache
37
+
38
+ def generate(
39
+ self,
40
+ package: str,
41
+ old_version: str,
42
+ new_version: str,
43
+ progress_callback: Callable[[str], None] | None = None,
44
+ ) -> GeneratedKnowledgeBase:
45
+ """Generate a knowledge base for a package migration.
46
+
47
+ Args:
48
+ package: Package name.
49
+ old_version: Starting version.
50
+ new_version: Target version.
51
+ progress_callback: Optional callback for progress updates.
52
+
53
+ Returns:
54
+ GeneratedKnowledgeBase with detected breaking changes.
55
+ """
56
+
57
+ def report(msg: str) -> None:
58
+ if progress_callback:
59
+ progress_callback(msg)
60
+
61
+ # Check cache first
62
+ if self.use_cache and self.cache:
63
+ cached = self.cache.get(package, old_version, new_version)
64
+ if cached:
65
+ report("Using cached knowledge base")
66
+ return cached
67
+
68
+ report("Fetching changelog sources...")
69
+
70
+ # Discover sources
71
+ sources = self.fetcher.discover_sources_sync(package, new_version)
72
+
73
+ if not sources:
74
+ report("No changelog sources found")
75
+ return GeneratedKnowledgeBase(
76
+ package=package,
77
+ old_version=old_version,
78
+ new_version=new_version,
79
+ overall_confidence=Confidence.LOW,
80
+ )
81
+
82
+ source_urls = [s.url for s in sources]
83
+ report(f"Found {len(sources)} source(s)")
84
+
85
+ # Extract version-specific content from changelogs
86
+ for source in sources:
87
+ if source.source_type == "changelog":
88
+ source.content = self.fetcher.extract_version_changelog(
89
+ source.content,
90
+ old_version,
91
+ new_version,
92
+ )
93
+
94
+ # Parse sources with LLM
95
+ breaking_changes: list[BreakingChange] = []
96
+
97
+ if self.parser.is_available:
98
+ report("Parsing changelog with LLM...")
99
+ breaking_changes = self.parser.parse_multiple_sources(
100
+ sources,
101
+ package,
102
+ old_version,
103
+ new_version,
104
+ )
105
+ report(f"Found {len(breaking_changes)} breaking change(s)")
106
+ else:
107
+ report("LLM not available - skipping changelog parsing")
108
+
109
+ # Determine overall confidence
110
+ overall_confidence = self._calculate_overall_confidence(breaking_changes, sources)
111
+
112
+ # Create knowledge base
113
+ kb = GeneratedKnowledgeBase(
114
+ package=package,
115
+ old_version=old_version,
116
+ new_version=new_version,
117
+ breaking_changes=breaking_changes,
118
+ sources=source_urls,
119
+ overall_confidence=overall_confidence,
120
+ )
121
+
122
+ # Cache result
123
+ if self.use_cache and self.cache:
124
+ self.cache.set(kb)
125
+ report("Cached knowledge base")
126
+
127
+ return kb
128
+
129
+ def _calculate_overall_confidence(
130
+ self,
131
+ changes: list[BreakingChange],
132
+ sources: list,
133
+ ) -> Confidence:
134
+ """Calculate overall confidence based on changes and sources.
135
+
136
+ Args:
137
+ changes: List of breaking changes.
138
+ sources: List of sources used.
139
+
140
+ Returns:
141
+ Overall confidence level.
142
+ """
143
+ if not changes:
144
+ return Confidence.LOW
145
+
146
+ # Check if we have migration guide (high confidence source)
147
+ has_migration_guide = any(s.source_type == "migration_guide" for s in sources)
148
+
149
+ if has_migration_guide:
150
+ return Confidence.HIGH
151
+
152
+ # Count confidence levels
153
+ high_count = sum(1 for c in changes if c.confidence == Confidence.HIGH)
154
+ medium_count = sum(1 for c in changes if c.confidence == Confidence.MEDIUM)
155
+
156
+ if high_count >= len(changes) / 2:
157
+ return Confidence.HIGH
158
+ elif medium_count + high_count >= len(changes) / 2:
159
+ return Confidence.MEDIUM
160
+
161
+ return Confidence.LOW
162
+
163
+
164
+ # Tier 1 libraries with deterministic AST transforms
165
+ TIER_1_LIBRARIES = {"pydantic", "fastapi", "sqlalchemy", "pandas", "requests"}
166
+
167
+
168
+ def is_tier_1_library(library: str) -> bool:
169
+ """Check if a library is Tier 1 (has deterministic transforms).
170
+
171
+ Args:
172
+ library: Library name.
173
+
174
+ Returns:
175
+ True if Tier 1.
176
+ """
177
+ return library.lower() in TIER_1_LIBRARIES
178
+
179
+
180
+ async def generate_knowledge_base(
181
+ package: str,
182
+ old_version: str,
183
+ new_version: str,
184
+ progress_callback: Callable[[str], None] | None = None,
185
+ ) -> GeneratedKnowledgeBase:
186
+ """Async interface for generating knowledge base.
187
+
188
+ Args:
189
+ package: Package name.
190
+ old_version: Starting version.
191
+ new_version: Target version.
192
+ progress_callback: Optional callback for progress updates.
193
+
194
+ Returns:
195
+ GeneratedKnowledgeBase with detected breaking changes.
196
+ """
197
+ generator = KnowledgeGenerator()
198
+ return generator.generate(package, old_version, new_version, progress_callback)
199
+
200
+
201
+ def generate_knowledge_base_sync(
202
+ package: str,
203
+ old_version: str,
204
+ new_version: str,
205
+ progress_callback: Callable[[str], None] | None = None,
206
+ ) -> GeneratedKnowledgeBase:
207
+ """Synchronous interface for generating knowledge base.
208
+
209
+ Args:
210
+ package: Package name.
211
+ old_version: Starting version.
212
+ new_version: Target version.
213
+ progress_callback: Optional callback for progress updates.
214
+
215
+ Returns:
216
+ GeneratedKnowledgeBase with detected breaking changes.
217
+ """
218
+ generator = KnowledgeGenerator()
219
+ return generator.generate(package, old_version, new_version, progress_callback)
220
+
221
+
222
+ # Singleton instance
223
+ _default_generator: KnowledgeGenerator | None = None
224
+
225
+
226
+ def get_knowledge_generator() -> KnowledgeGenerator:
227
+ """Get the default knowledge generator instance."""
228
+ global _default_generator
229
+ if _default_generator is None:
230
+ _default_generator = KnowledgeGenerator()
231
+ return _default_generator
@@ -0,0 +1,151 @@
1
+ """Data models for auto-generated knowledge bases."""
2
+
3
+ from dataclasses import dataclass, field
4
+ from enum import Enum
5
+
6
+
7
+ class ChangeCategory(Enum):
8
+ """Categories of breaking changes."""
9
+
10
+ REMOVED = "removed"
11
+ RENAMED = "renamed"
12
+ SIGNATURE_CHANGED = "signature_changed"
13
+ BEHAVIOR_CHANGED = "behavior_changed"
14
+
15
+
16
+ class Confidence(Enum):
17
+ """Confidence levels for detected changes."""
18
+
19
+ HIGH = "high" # From migration guide or explicit changelog
20
+ MEDIUM = "medium" # From changelog parsing
21
+ LOW = "low" # From AST diff only
22
+
23
+ def __ge__(self, other: "Confidence") -> bool:
24
+ order = {Confidence.LOW: 0, Confidence.MEDIUM: 1, Confidence.HIGH: 2}
25
+ return order[self] >= order[other]
26
+
27
+ def __gt__(self, other: "Confidence") -> bool:
28
+ order = {Confidence.LOW: 0, Confidence.MEDIUM: 1, Confidence.HIGH: 2}
29
+ return order[self] > order[other]
30
+
31
+ def __le__(self, other: "Confidence") -> bool:
32
+ order = {Confidence.LOW: 0, Confidence.MEDIUM: 1, Confidence.HIGH: 2}
33
+ return order[self] <= order[other]
34
+
35
+ def __lt__(self, other: "Confidence") -> bool:
36
+ order = {Confidence.LOW: 0, Confidence.MEDIUM: 1, Confidence.HIGH: 2}
37
+ return order[self] < order[other]
38
+
39
+
40
+ @dataclass
41
+ class BreakingChange:
42
+ """Represents a single breaking change detected from sources."""
43
+
44
+ category: ChangeCategory
45
+ old_api: str
46
+ new_api: str | None
47
+ description: str
48
+ confidence: Confidence
49
+ source: str | None = None # Where this change was detected from
50
+
51
+ def to_dict(self) -> dict:
52
+ """Convert to dictionary for serialization."""
53
+ return {
54
+ "category": self.category.value,
55
+ "old_api": self.old_api,
56
+ "new_api": self.new_api,
57
+ "description": self.description,
58
+ "confidence": self.confidence.value,
59
+ "source": self.source,
60
+ }
61
+
62
+ @classmethod
63
+ def from_dict(cls, data: dict) -> "BreakingChange":
64
+ """Create from dictionary."""
65
+ return cls(
66
+ category=ChangeCategory(data["category"]),
67
+ old_api=data["old_api"],
68
+ new_api=data.get("new_api"),
69
+ description=data["description"],
70
+ confidence=Confidence(data["confidence"]),
71
+ source=data.get("source"),
72
+ )
73
+
74
+
75
+ @dataclass
76
+ class ChangelogSource:
77
+ """Represents a source of changelog information."""
78
+
79
+ url: str
80
+ source_type: str # "changelog", "migration_guide", "release_notes"
81
+ content: str
82
+ version_range: tuple[str, str] | None = None # (from_version, to_version)
83
+
84
+ def to_dict(self) -> dict:
85
+ """Convert to dictionary for serialization."""
86
+ return {
87
+ "url": self.url,
88
+ "source_type": self.source_type,
89
+ "content": self.content,
90
+ "version_range": self.version_range,
91
+ }
92
+
93
+ @classmethod
94
+ def from_dict(cls, data: dict) -> "ChangelogSource":
95
+ """Create from dictionary."""
96
+ return cls(
97
+ url=data["url"],
98
+ source_type=data["source_type"],
99
+ content=data["content"],
100
+ version_range=tuple(data["version_range"]) if data.get("version_range") else None,
101
+ )
102
+
103
+
104
+ @dataclass
105
+ class GeneratedKnowledgeBase:
106
+ """Auto-generated knowledge base from changelogs and API diffs."""
107
+
108
+ package: str
109
+ old_version: str
110
+ new_version: str
111
+ breaking_changes: list[BreakingChange] = field(default_factory=list)
112
+ sources: list[str] = field(default_factory=list) # URLs of sources used
113
+ overall_confidence: Confidence = Confidence.LOW
114
+
115
+ @property
116
+ def has_changes(self) -> bool:
117
+ """Check if there are any breaking changes."""
118
+ return len(self.breaking_changes) > 0
119
+
120
+ def get_changes_by_confidence(self, min_confidence: Confidence) -> list[BreakingChange]:
121
+ """Get changes with at least the specified confidence level."""
122
+ return [c for c in self.breaking_changes if c.confidence >= min_confidence]
123
+
124
+ def get_changes_by_category(self, category: ChangeCategory) -> list[BreakingChange]:
125
+ """Get changes of a specific category."""
126
+ return [c for c in self.breaking_changes if c.category == category]
127
+
128
+ def to_dict(self) -> dict:
129
+ """Convert to dictionary for serialization."""
130
+ return {
131
+ "package": self.package,
132
+ "old_version": self.old_version,
133
+ "new_version": self.new_version,
134
+ "breaking_changes": [c.to_dict() for c in self.breaking_changes],
135
+ "sources": self.sources,
136
+ "overall_confidence": self.overall_confidence.value,
137
+ }
138
+
139
+ @classmethod
140
+ def from_dict(cls, data: dict) -> "GeneratedKnowledgeBase":
141
+ """Create from dictionary."""
142
+ return cls(
143
+ package=data["package"],
144
+ old_version=data["old_version"],
145
+ new_version=data["new_version"],
146
+ breaking_changes=[
147
+ BreakingChange.from_dict(c) for c in data.get("breaking_changes", [])
148
+ ],
149
+ sources=data.get("sources", []),
150
+ overall_confidence=Confidence(data.get("overall_confidence", "low")),
151
+ )
@@ -0,0 +1,270 @@
1
+ """LLM-based changelog parser for extracting breaking changes."""
2
+
3
+ import json
4
+ import re
5
+ from typing import cast
6
+
7
+ from codeshift.knowledge.models import (
8
+ BreakingChange,
9
+ ChangeCategory,
10
+ ChangelogSource,
11
+ Confidence,
12
+ )
13
+ from codeshift.utils.llm_client import LLMClient, get_llm_client
14
+
15
+
16
+ class ChangelogParser:
17
+ """Parses changelog content using LLM to extract breaking changes."""
18
+
19
+ SYSTEM_PROMPT = """You are an expert at analyzing Python library changelogs and migration guides.
20
+ Your task is to extract breaking changes from the provided changelog content.
21
+
22
+ For each breaking change, identify:
23
+ 1. category: One of "removed", "renamed", "signature_changed", "behavior_changed"
24
+ 2. old_api: The old API that is affected (function name, class name, parameter, etc.)
25
+ 3. new_api: The new API to use instead (if applicable, null otherwise)
26
+ 4. description: A brief description of the change
27
+
28
+ Focus only on BREAKING changes that would require code modifications.
29
+ Do not include new features, bug fixes, or deprecation warnings unless they affect existing code.
30
+
31
+ Respond with a JSON array of breaking changes. Example:
32
+ [
33
+ {
34
+ "category": "renamed",
35
+ "old_api": ".dict()",
36
+ "new_api": ".model_dump()",
37
+ "description": "The .dict() method has been renamed to .model_dump()"
38
+ },
39
+ {
40
+ "category": "removed",
41
+ "old_api": "parse_obj()",
42
+ "new_api": "model_validate()",
43
+ "description": "parse_obj() has been removed, use model_validate() instead"
44
+ }
45
+ ]
46
+
47
+ If there are no breaking changes, respond with an empty array: []"""
48
+
49
+ def __init__(self, client: LLMClient | None = None):
50
+ """Initialize the parser.
51
+
52
+ Args:
53
+ client: LLM client to use. Defaults to singleton.
54
+ """
55
+ self.client = client or get_llm_client()
56
+
57
+ @property
58
+ def is_available(self) -> bool:
59
+ """Check if the parser is available (LLM client configured)."""
60
+ return self.client.is_available
61
+
62
+ def parse_changelog(
63
+ self,
64
+ source: ChangelogSource,
65
+ package: str,
66
+ from_version: str,
67
+ to_version: str,
68
+ ) -> list[BreakingChange]:
69
+ """Parse a changelog source to extract breaking changes.
70
+
71
+ Args:
72
+ source: The changelog source to parse.
73
+ package: Package name.
74
+ from_version: Starting version.
75
+ to_version: Target version.
76
+
77
+ Returns:
78
+ List of detected breaking changes.
79
+ """
80
+ if not self.is_available:
81
+ return []
82
+
83
+ # Truncate content if too long
84
+ content = source.content
85
+ max_length = 15000 # Leave room for prompts and response
86
+ if len(content) > max_length:
87
+ content = content[:max_length] + "\n\n[Content truncated...]"
88
+
89
+ prompt = f"""Analyze the following {source.source_type} for the Python package "{package}".
90
+ Extract all breaking changes between version {from_version} and {to_version}.
91
+
92
+ {source.source_type.upper()} CONTENT:
93
+ ```
94
+ {content}
95
+ ```
96
+
97
+ Extract breaking changes as a JSON array:"""
98
+
99
+ response = self.client.generate(
100
+ prompt=prompt,
101
+ system_prompt=self.SYSTEM_PROMPT,
102
+ temperature=0.0,
103
+ )
104
+
105
+ if not response.success:
106
+ return []
107
+
108
+ return self._parse_response(response.content, source)
109
+
110
+ def parse_multiple_sources(
111
+ self,
112
+ sources: list[ChangelogSource],
113
+ package: str,
114
+ from_version: str,
115
+ to_version: str,
116
+ ) -> list[BreakingChange]:
117
+ """Parse multiple changelog sources and merge results.
118
+
119
+ Args:
120
+ sources: List of changelog sources to parse.
121
+ package: Package name.
122
+ from_version: Starting version.
123
+ to_version: Target version.
124
+
125
+ Returns:
126
+ Merged list of breaking changes (duplicates removed).
127
+ """
128
+ all_changes: list[BreakingChange] = []
129
+ seen_apis: set[str] = set()
130
+
131
+ for source in sources:
132
+ changes = self.parse_changelog(source, package, from_version, to_version)
133
+
134
+ for change in changes:
135
+ # Deduplicate by old_api
136
+ if change.old_api not in seen_apis:
137
+ seen_apis.add(change.old_api)
138
+ all_changes.append(change)
139
+ else:
140
+ # Update confidence if we find the same change in a better source
141
+ for existing in all_changes:
142
+ if existing.old_api == change.old_api:
143
+ if change.confidence > existing.confidence:
144
+ existing.confidence = change.confidence
145
+ existing.source = change.source
146
+ break
147
+
148
+ return all_changes
149
+
150
+ def _parse_response(
151
+ self,
152
+ content: str,
153
+ source: ChangelogSource,
154
+ ) -> list[BreakingChange]:
155
+ """Parse LLM response into BreakingChange objects.
156
+
157
+ Args:
158
+ content: Raw LLM response.
159
+ source: The source this was parsed from.
160
+
161
+ Returns:
162
+ List of BreakingChange objects.
163
+ """
164
+ # Extract JSON from response
165
+ json_content = self._extract_json(content)
166
+ if not json_content:
167
+ return []
168
+
169
+ try:
170
+ data = json.loads(json_content)
171
+ if not isinstance(data, list):
172
+ return []
173
+
174
+ # Determine confidence based on source type
175
+ confidence = self._get_source_confidence(source.source_type)
176
+
177
+ changes = []
178
+ for item in data:
179
+ if not isinstance(item, dict):
180
+ continue
181
+
182
+ try:
183
+ category = ChangeCategory(item.get("category", "behavior_changed"))
184
+ except ValueError:
185
+ category = ChangeCategory.BEHAVIOR_CHANGED
186
+
187
+ changes.append(
188
+ BreakingChange(
189
+ category=category,
190
+ old_api=item.get("old_api", ""),
191
+ new_api=item.get("new_api"),
192
+ description=item.get("description", ""),
193
+ confidence=confidence,
194
+ source=source.url,
195
+ )
196
+ )
197
+
198
+ return changes
199
+
200
+ except json.JSONDecodeError:
201
+ return []
202
+
203
+ def _extract_json(self, content: str) -> str | None:
204
+ """Extract JSON array from LLM response.
205
+
206
+ Args:
207
+ content: Raw LLM response.
208
+
209
+ Returns:
210
+ JSON string or None.
211
+ """
212
+ # Try to find JSON array in response
213
+ content = content.strip()
214
+
215
+ # Try direct parse first
216
+ if content.startswith("["):
217
+ # Find matching closing bracket
218
+ bracket_count = 0
219
+ for i, char in enumerate(content):
220
+ if char == "[":
221
+ bracket_count += 1
222
+ elif char == "]":
223
+ bracket_count -= 1
224
+ if bracket_count == 0:
225
+ return content[: i + 1]
226
+
227
+ # Try to find JSON in code blocks
228
+ code_block_pattern = r"```(?:json)?\s*([\s\S]*?)```"
229
+ matches = re.findall(code_block_pattern, content)
230
+ for match in matches:
231
+ match_str = cast(str, match).strip()
232
+ if match_str.startswith("["):
233
+ return match_str
234
+
235
+ # Try to find bare JSON array
236
+ array_pattern = r"\[[\s\S]*?\]"
237
+ matches = re.findall(array_pattern, content)
238
+ if matches:
239
+ # Return the longest match (likely the full array)
240
+ return cast(str, max(matches, key=len))
241
+
242
+ return None
243
+
244
+ def _get_source_confidence(self, source_type: str) -> Confidence:
245
+ """Get confidence level based on source type.
246
+
247
+ Args:
248
+ source_type: Type of source.
249
+
250
+ Returns:
251
+ Confidence level.
252
+ """
253
+ confidence_map = {
254
+ "migration_guide": Confidence.HIGH,
255
+ "release_notes": Confidence.HIGH,
256
+ "changelog": Confidence.MEDIUM,
257
+ }
258
+ return confidence_map.get(source_type, Confidence.LOW)
259
+
260
+
261
+ # Singleton instance
262
+ _default_parser: ChangelogParser | None = None
263
+
264
+
265
+ def get_changelog_parser() -> ChangelogParser:
266
+ """Get the default changelog parser instance."""
267
+ global _default_parser
268
+ if _default_parser is None:
269
+ _default_parser = ChangelogParser()
270
+ return _default_parser