comfygit-core 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- comfygit_core/analyzers/custom_node_scanner.py +109 -0
- comfygit_core/analyzers/git_change_parser.py +156 -0
- comfygit_core/analyzers/model_scanner.py +318 -0
- comfygit_core/analyzers/node_classifier.py +58 -0
- comfygit_core/analyzers/node_git_analyzer.py +77 -0
- comfygit_core/analyzers/status_scanner.py +362 -0
- comfygit_core/analyzers/workflow_dependency_parser.py +143 -0
- comfygit_core/caching/__init__.py +16 -0
- comfygit_core/caching/api_cache.py +210 -0
- comfygit_core/caching/base.py +212 -0
- comfygit_core/caching/comfyui_cache.py +100 -0
- comfygit_core/caching/custom_node_cache.py +320 -0
- comfygit_core/caching/workflow_cache.py +797 -0
- comfygit_core/clients/__init__.py +4 -0
- comfygit_core/clients/civitai_client.py +412 -0
- comfygit_core/clients/github_client.py +349 -0
- comfygit_core/clients/registry_client.py +230 -0
- comfygit_core/configs/comfyui_builtin_nodes.py +1614 -0
- comfygit_core/configs/comfyui_models.py +62 -0
- comfygit_core/configs/model_config.py +151 -0
- comfygit_core/constants.py +82 -0
- comfygit_core/core/environment.py +1635 -0
- comfygit_core/core/workspace.py +898 -0
- comfygit_core/factories/environment_factory.py +419 -0
- comfygit_core/factories/uv_factory.py +61 -0
- comfygit_core/factories/workspace_factory.py +109 -0
- comfygit_core/infrastructure/sqlite_manager.py +156 -0
- comfygit_core/integrations/__init__.py +7 -0
- comfygit_core/integrations/uv_command.py +318 -0
- comfygit_core/logging/logging_config.py +15 -0
- comfygit_core/managers/environment_git_orchestrator.py +316 -0
- comfygit_core/managers/environment_model_manager.py +296 -0
- comfygit_core/managers/export_import_manager.py +116 -0
- comfygit_core/managers/git_manager.py +667 -0
- comfygit_core/managers/model_download_manager.py +252 -0
- comfygit_core/managers/model_symlink_manager.py +166 -0
- comfygit_core/managers/node_manager.py +1378 -0
- comfygit_core/managers/pyproject_manager.py +1321 -0
- comfygit_core/managers/user_content_symlink_manager.py +436 -0
- comfygit_core/managers/uv_project_manager.py +569 -0
- comfygit_core/managers/workflow_manager.py +1944 -0
- comfygit_core/models/civitai.py +432 -0
- comfygit_core/models/commit.py +18 -0
- comfygit_core/models/environment.py +293 -0
- comfygit_core/models/exceptions.py +378 -0
- comfygit_core/models/manifest.py +132 -0
- comfygit_core/models/node_mapping.py +201 -0
- comfygit_core/models/protocols.py +248 -0
- comfygit_core/models/registry.py +63 -0
- comfygit_core/models/shared.py +356 -0
- comfygit_core/models/sync.py +42 -0
- comfygit_core/models/system.py +204 -0
- comfygit_core/models/workflow.py +914 -0
- comfygit_core/models/workspace_config.py +71 -0
- comfygit_core/py.typed +0 -0
- comfygit_core/repositories/migrate_paths.py +49 -0
- comfygit_core/repositories/model_repository.py +958 -0
- comfygit_core/repositories/node_mappings_repository.py +246 -0
- comfygit_core/repositories/workflow_repository.py +57 -0
- comfygit_core/repositories/workspace_config_repository.py +121 -0
- comfygit_core/resolvers/global_node_resolver.py +459 -0
- comfygit_core/resolvers/model_resolver.py +250 -0
- comfygit_core/services/import_analyzer.py +218 -0
- comfygit_core/services/model_downloader.py +422 -0
- comfygit_core/services/node_lookup_service.py +251 -0
- comfygit_core/services/registry_data_manager.py +161 -0
- comfygit_core/strategies/__init__.py +4 -0
- comfygit_core/strategies/auto.py +72 -0
- comfygit_core/strategies/confirmation.py +69 -0
- comfygit_core/utils/comfyui_ops.py +125 -0
- comfygit_core/utils/common.py +164 -0
- comfygit_core/utils/conflict_parser.py +232 -0
- comfygit_core/utils/dependency_parser.py +231 -0
- comfygit_core/utils/download.py +216 -0
- comfygit_core/utils/environment_cleanup.py +111 -0
- comfygit_core/utils/filesystem.py +178 -0
- comfygit_core/utils/git.py +1184 -0
- comfygit_core/utils/input_signature.py +145 -0
- comfygit_core/utils/model_categories.py +52 -0
- comfygit_core/utils/pytorch.py +71 -0
- comfygit_core/utils/requirements.py +211 -0
- comfygit_core/utils/retry.py +242 -0
- comfygit_core/utils/symlink_utils.py +119 -0
- comfygit_core/utils/system_detector.py +258 -0
- comfygit_core/utils/uuid.py +28 -0
- comfygit_core/utils/uv_error_handler.py +158 -0
- comfygit_core/utils/version.py +73 -0
- comfygit_core/utils/workflow_hash.py +90 -0
- comfygit_core/validation/resolution_tester.py +297 -0
- comfygit_core-0.2.0.dist-info/METADATA +939 -0
- comfygit_core-0.2.0.dist-info/RECORD +93 -0
- comfygit_core-0.2.0.dist-info/WHEEL +4 -0
- comfygit_core-0.2.0.dist-info/licenses/LICENSE.txt +661 -0
|
@@ -0,0 +1,459 @@
|
|
|
1
|
+
"""Global node resolver using prebuilt mappings."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import TYPE_CHECKING, List
|
|
8
|
+
|
|
9
|
+
from comfygit_core.models.workflow import (
|
|
10
|
+
WorkflowNode,
|
|
11
|
+
ResolvedNodePackage,
|
|
12
|
+
NodeResolutionContext,
|
|
13
|
+
ScoredPackageMatch,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
from ..logging.logging_config import get_logger
|
|
17
|
+
from ..repositories.node_mappings_repository import NodeMappingsRepository
|
|
18
|
+
from ..utils.input_signature import create_node_key, normalize_workflow_inputs
|
|
19
|
+
|
|
20
|
+
logger = get_logger(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class GlobalNodeResolver:
|
|
24
|
+
"""Resolves unknown nodes using global mappings repository.
|
|
25
|
+
|
|
26
|
+
This class is responsible for resolution logic only - data access
|
|
27
|
+
is delegated to NodeMappingsRepository.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
def __init__(self, repository: NodeMappingsRepository):
|
|
31
|
+
"""Initialize resolver with repository.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
repository: NodeMappingsRepository for data access
|
|
35
|
+
"""
|
|
36
|
+
self.repository = repository
|
|
37
|
+
|
|
38
|
+
# Convenience properties for backward compatibility
|
|
39
|
+
@property
|
|
40
|
+
def global_mappings(self):
|
|
41
|
+
"""Access global mappings from repository."""
|
|
42
|
+
return self.repository.global_mappings
|
|
43
|
+
|
|
44
|
+
def resolve_github_url(self, github_url: str):
|
|
45
|
+
"""Resolve GitHub URL to registry package."""
|
|
46
|
+
return self.repository.resolve_github_url(github_url)
|
|
47
|
+
|
|
48
|
+
def get_github_url_for_package(self, package_id: str) -> str | None:
|
|
49
|
+
"""Get GitHub URL for a package ID."""
|
|
50
|
+
return self.repository.get_github_url_for_package(package_id)
|
|
51
|
+
|
|
52
|
+
def resolve_single_node_from_mapping(self, node: WorkflowNode) -> List[ResolvedNodePackage] | None:
|
|
53
|
+
"""Resolve a single node type using global mappings.
|
|
54
|
+
|
|
55
|
+
Returns all ranked packages for this node from the registry.
|
|
56
|
+
Packages are sorted by rank (1 = most popular).
|
|
57
|
+
"""
|
|
58
|
+
mappings = self.repository.global_mappings.mappings
|
|
59
|
+
packages = self.repository.global_mappings.packages
|
|
60
|
+
|
|
61
|
+
node_type = node.type
|
|
62
|
+
inputs = node.inputs
|
|
63
|
+
|
|
64
|
+
# Strategy 1: Try exact match with input signature
|
|
65
|
+
if inputs:
|
|
66
|
+
input_signature = normalize_workflow_inputs(inputs)
|
|
67
|
+
logger.debug(f"Input signature for {node_type}: {input_signature}")
|
|
68
|
+
if input_signature:
|
|
69
|
+
exact_key = create_node_key(node_type, input_signature)
|
|
70
|
+
logger.debug(f"Exact key for {node_type}: {exact_key}")
|
|
71
|
+
if exact_key in mappings:
|
|
72
|
+
mapping = mappings[exact_key]
|
|
73
|
+
logger.debug(f"Exact match for {node_type}: {len(mapping.packages)} package(s)")
|
|
74
|
+
|
|
75
|
+
# Empty packages list = not found
|
|
76
|
+
if not mapping.packages:
|
|
77
|
+
return None
|
|
78
|
+
|
|
79
|
+
# Return ALL packages from this mapping, sorted by rank
|
|
80
|
+
resolved_packages = []
|
|
81
|
+
for pkg_mapping in sorted(mapping.packages, key=lambda x: x.rank):
|
|
82
|
+
resolved_packages.append(ResolvedNodePackage(
|
|
83
|
+
package_id=pkg_mapping.package_id,
|
|
84
|
+
package_data=packages.get(pkg_mapping.package_id),
|
|
85
|
+
node_type=node_type,
|
|
86
|
+
versions=pkg_mapping.versions,
|
|
87
|
+
match_type="exact",
|
|
88
|
+
match_confidence=1.0,
|
|
89
|
+
rank=pkg_mapping.rank
|
|
90
|
+
))
|
|
91
|
+
|
|
92
|
+
return resolved_packages
|
|
93
|
+
|
|
94
|
+
# Strategy 2: Try type-only match
|
|
95
|
+
type_only_key = create_node_key(node_type, "_")
|
|
96
|
+
if type_only_key in mappings:
|
|
97
|
+
mapping = mappings[type_only_key]
|
|
98
|
+
logger.debug(f"Type-only match for {node_type}: {len(mapping.packages)} package(s)")
|
|
99
|
+
|
|
100
|
+
# Empty packages list = not found
|
|
101
|
+
if not mapping.packages:
|
|
102
|
+
return None
|
|
103
|
+
|
|
104
|
+
# Return ALL packages from this mapping, sorted by rank
|
|
105
|
+
resolved_packages = []
|
|
106
|
+
for pkg_mapping in sorted(mapping.packages, key=lambda x: x.rank):
|
|
107
|
+
resolved_packages.append(ResolvedNodePackage(
|
|
108
|
+
package_id=pkg_mapping.package_id,
|
|
109
|
+
package_data=packages.get(pkg_mapping.package_id),
|
|
110
|
+
node_type=node_type,
|
|
111
|
+
versions=pkg_mapping.versions,
|
|
112
|
+
match_type="type_only",
|
|
113
|
+
match_confidence=0.9,
|
|
114
|
+
rank=pkg_mapping.rank
|
|
115
|
+
))
|
|
116
|
+
|
|
117
|
+
return resolved_packages
|
|
118
|
+
|
|
119
|
+
logger.debug(f"No match found for {node_type}")
|
|
120
|
+
return None
|
|
121
|
+
|
|
122
|
+
def resolve_single_node_with_context(
|
|
123
|
+
self,
|
|
124
|
+
node: WorkflowNode,
|
|
125
|
+
context: NodeResolutionContext | None = None
|
|
126
|
+
) -> List[ResolvedNodePackage] | None:
|
|
127
|
+
"""Enhanced resolution with context awareness.
|
|
128
|
+
|
|
129
|
+
Resolution priority:
|
|
130
|
+
1. Custom mappings from pyproject
|
|
131
|
+
2. Properties field (cnr_id from workflow)
|
|
132
|
+
3. Global mapping table (existing logic)
|
|
133
|
+
4. None (trigger interactive resolution)
|
|
134
|
+
|
|
135
|
+
Args:
|
|
136
|
+
node: WorkflowNode to resolve
|
|
137
|
+
context: Optional resolution context for caching and custom mappings
|
|
138
|
+
|
|
139
|
+
Returns:
|
|
140
|
+
List of resolved packages, empty list for skip, or None for unresolved
|
|
141
|
+
"""
|
|
142
|
+
node_type = node.type
|
|
143
|
+
|
|
144
|
+
# Priority 1: Custom mappings
|
|
145
|
+
if context and node_type in context.custom_mappings:
|
|
146
|
+
mapping = context.custom_mappings[node_type]
|
|
147
|
+
if isinstance(mapping, bool): # Node marked as optional
|
|
148
|
+
logger.debug(f"Found optional {node_type} (user-configured optional)")
|
|
149
|
+
return [
|
|
150
|
+
ResolvedNodePackage(
|
|
151
|
+
node_type=node_type,
|
|
152
|
+
is_optional=True,
|
|
153
|
+
match_type="custom_mapping"
|
|
154
|
+
)
|
|
155
|
+
]
|
|
156
|
+
assert isinstance(mapping, str) # Should be Package ID
|
|
157
|
+
logger.debug(f"Custom mapping for {node_type}: {mapping}")
|
|
158
|
+
result = [self._create_resolved_package_from_id(mapping, node_type, "custom_mapping")]
|
|
159
|
+
return result
|
|
160
|
+
|
|
161
|
+
# Priority 2: Properties field (cnr_id from ComfyUI)
|
|
162
|
+
if node.properties:
|
|
163
|
+
cnr_id = node.properties.get('cnr_id')
|
|
164
|
+
ver = node.properties.get('ver') # Git commit hash
|
|
165
|
+
|
|
166
|
+
if cnr_id:
|
|
167
|
+
logger.debug(f"Found cnr_id in properties: {cnr_id} @ {ver}")
|
|
168
|
+
|
|
169
|
+
# Validate package exists in global mappings
|
|
170
|
+
pkg_data = self.repository.get_package(cnr_id)
|
|
171
|
+
if pkg_data:
|
|
172
|
+
|
|
173
|
+
result = [ResolvedNodePackage(
|
|
174
|
+
package_id=cnr_id,
|
|
175
|
+
package_data=pkg_data,
|
|
176
|
+
node_type=node_type,
|
|
177
|
+
versions=[ver] if ver else [],
|
|
178
|
+
match_type="properties",
|
|
179
|
+
match_confidence=1.0
|
|
180
|
+
)]
|
|
181
|
+
return result
|
|
182
|
+
else:
|
|
183
|
+
logger.warning(f"cnr_id {cnr_id} from properties not in registry")
|
|
184
|
+
|
|
185
|
+
# Priority 3: Global table (existing logic)
|
|
186
|
+
result = self.resolve_single_node_from_mapping(node)
|
|
187
|
+
if result:
|
|
188
|
+
# Apply auto-selection logic if enabled and multiple packages found
|
|
189
|
+
if context and context.auto_select_ambiguous and len(result) > 1:
|
|
190
|
+
selected = self._auto_select_best_package(result, context.installed_packages)
|
|
191
|
+
return [selected]
|
|
192
|
+
return result
|
|
193
|
+
|
|
194
|
+
# Priority 4: No match - return None to trigger interactive strategy with unified search
|
|
195
|
+
logger.debug(f"No resolution found for {node_type} - will use interactive strategy")
|
|
196
|
+
return None
|
|
197
|
+
|
|
198
|
+
def _auto_select_best_package(
|
|
199
|
+
self,
|
|
200
|
+
packages: List[ResolvedNodePackage],
|
|
201
|
+
installed_packages: dict
|
|
202
|
+
) -> ResolvedNodePackage:
|
|
203
|
+
"""Auto-select best package from ranked list based on installed state.
|
|
204
|
+
|
|
205
|
+
Selection priority:
|
|
206
|
+
1. If any packages are installed, pick the one with best (lowest) rank
|
|
207
|
+
2. If none installed, pick rank 1 (most popular)
|
|
208
|
+
|
|
209
|
+
Args:
|
|
210
|
+
packages: List of ranked packages from registry
|
|
211
|
+
installed_packages: Dict of installed packages {package_id: NodeInfo}
|
|
212
|
+
|
|
213
|
+
Returns:
|
|
214
|
+
Single best package
|
|
215
|
+
"""
|
|
216
|
+
# Find installed packages from the candidates
|
|
217
|
+
installed_candidates = [
|
|
218
|
+
pkg for pkg in packages
|
|
219
|
+
if pkg.package_id in installed_packages
|
|
220
|
+
]
|
|
221
|
+
|
|
222
|
+
if installed_candidates:
|
|
223
|
+
# Pick installed package with best rank (lowest number)
|
|
224
|
+
best = min(installed_candidates, key=lambda x: x.rank or 999)
|
|
225
|
+
logger.debug(
|
|
226
|
+
f"Auto-selected {best.package_id} (rank {best.rank}, installed) "
|
|
227
|
+
f"over {len(packages)-1} other option(s)"
|
|
228
|
+
)
|
|
229
|
+
return best
|
|
230
|
+
|
|
231
|
+
# No installed packages - pick rank 1 (most popular)
|
|
232
|
+
best = min(packages, key=lambda x: x.rank or 999)
|
|
233
|
+
logger.debug(
|
|
234
|
+
f"Auto-selected {best.package_id} (rank {best.rank}, most popular) "
|
|
235
|
+
f"from {len(packages)} option(s)"
|
|
236
|
+
)
|
|
237
|
+
return best
|
|
238
|
+
|
|
239
|
+
def _create_resolved_package_from_id(
|
|
240
|
+
self,
|
|
241
|
+
pkg_id: str,
|
|
242
|
+
node_type: str,
|
|
243
|
+
match_type: str
|
|
244
|
+
) -> ResolvedNodePackage:
|
|
245
|
+
"""Create ResolvedNodePackage from package ID.
|
|
246
|
+
|
|
247
|
+
Args:
|
|
248
|
+
pkg_id: Package ID to create package for
|
|
249
|
+
node_type: Node type being resolved
|
|
250
|
+
match_type: Type of match (session_cache, custom_mapping, properties, etc.)
|
|
251
|
+
|
|
252
|
+
Returns:
|
|
253
|
+
ResolvedNodePackage instance
|
|
254
|
+
"""
|
|
255
|
+
pkg_data = self.repository.get_package(pkg_id)
|
|
256
|
+
|
|
257
|
+
return ResolvedNodePackage(
|
|
258
|
+
package_id=pkg_id,
|
|
259
|
+
package_data=pkg_data,
|
|
260
|
+
node_type=node_type,
|
|
261
|
+
versions=[],
|
|
262
|
+
match_type=match_type,
|
|
263
|
+
match_confidence=1.0
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
def search_packages(
|
|
267
|
+
self,
|
|
268
|
+
node_type: str,
|
|
269
|
+
installed_packages: dict = {},
|
|
270
|
+
include_registry: bool = True,
|
|
271
|
+
limit: int = 10
|
|
272
|
+
) -> List[ScoredPackageMatch]:
|
|
273
|
+
"""Unified search with heuristic boosting.
|
|
274
|
+
|
|
275
|
+
Combines fuzzy matching with hint pattern detection to rank packages.
|
|
276
|
+
Installed packages receive priority boosting.
|
|
277
|
+
|
|
278
|
+
Args:
|
|
279
|
+
node_type: Node type to search for
|
|
280
|
+
installed_packages: Already installed packages (prioritized)
|
|
281
|
+
include_registry: Also search full registry
|
|
282
|
+
limit: Maximum results
|
|
283
|
+
|
|
284
|
+
Returns:
|
|
285
|
+
Scored matches sorted by relevance (highest first)
|
|
286
|
+
"""
|
|
287
|
+
from difflib import SequenceMatcher
|
|
288
|
+
|
|
289
|
+
if not node_type:
|
|
290
|
+
return []
|
|
291
|
+
|
|
292
|
+
scored = []
|
|
293
|
+
node_type_lower = node_type.lower()
|
|
294
|
+
|
|
295
|
+
# Build candidate pool
|
|
296
|
+
candidates = {}
|
|
297
|
+
|
|
298
|
+
# Phase 1: Installed packages (always checked first)
|
|
299
|
+
for pkg_id in installed_packages.keys():
|
|
300
|
+
pkg_data = self.repository.get_package(pkg_id)
|
|
301
|
+
if pkg_data:
|
|
302
|
+
candidates[pkg_id] = (pkg_data, True) # True = installed
|
|
303
|
+
|
|
304
|
+
# Phase 2: Registry packages
|
|
305
|
+
if include_registry:
|
|
306
|
+
for pkg_id, pkg_data in self.repository.get_all_packages().items():
|
|
307
|
+
if pkg_id not in candidates:
|
|
308
|
+
candidates[pkg_id] = (pkg_data, False) # False = not installed
|
|
309
|
+
|
|
310
|
+
# Score each candidate
|
|
311
|
+
for pkg_id, (pkg_data, is_installed) in candidates.items():
|
|
312
|
+
score = self._calculate_match_score(
|
|
313
|
+
node_type=node_type,
|
|
314
|
+
node_type_lower=node_type_lower,
|
|
315
|
+
pkg_id=pkg_id,
|
|
316
|
+
pkg_data=pkg_data,
|
|
317
|
+
is_installed=is_installed
|
|
318
|
+
)
|
|
319
|
+
|
|
320
|
+
if score > 0.3: # Minimum threshold
|
|
321
|
+
confidence = self._score_to_confidence(score)
|
|
322
|
+
scored.append(ScoredPackageMatch(
|
|
323
|
+
package_id=pkg_id,
|
|
324
|
+
package_data=pkg_data,
|
|
325
|
+
score=score,
|
|
326
|
+
confidence=confidence
|
|
327
|
+
))
|
|
328
|
+
|
|
329
|
+
# Sort by (score, stars) descending - stars act as tiebreaker for similar scores
|
|
330
|
+
scored.sort(key=lambda x: (x.score, x.package_data.github_stars or 0), reverse=True)
|
|
331
|
+
return scored[:limit]
|
|
332
|
+
|
|
333
|
+
def _calculate_match_score(
|
|
334
|
+
self,
|
|
335
|
+
node_type: str,
|
|
336
|
+
node_type_lower: str,
|
|
337
|
+
pkg_id: str,
|
|
338
|
+
pkg_data,
|
|
339
|
+
is_installed: bool
|
|
340
|
+
) -> float:
|
|
341
|
+
"""Calculate comprehensive match score with bonuses.
|
|
342
|
+
|
|
343
|
+
Scoring pipeline:
|
|
344
|
+
1. Base fuzzy score (SequenceMatcher)
|
|
345
|
+
2. Keyword overlap bonus
|
|
346
|
+
3. Hint pattern bonuses (heuristics!)
|
|
347
|
+
4. Installed package bonus
|
|
348
|
+
5. Popularity bonus (GitHub stars on log scale)
|
|
349
|
+
"""
|
|
350
|
+
from difflib import SequenceMatcher
|
|
351
|
+
|
|
352
|
+
pkg_id_lower = pkg_id.lower()
|
|
353
|
+
|
|
354
|
+
# 1. Base fuzzy score (ID and display name only)
|
|
355
|
+
base_score = SequenceMatcher(None, node_type_lower, pkg_id_lower).ratio()
|
|
356
|
+
|
|
357
|
+
# Also check display name
|
|
358
|
+
if pkg_data.display_name:
|
|
359
|
+
name_score = SequenceMatcher(
|
|
360
|
+
None, node_type_lower, pkg_data.display_name.lower()
|
|
361
|
+
).ratio()
|
|
362
|
+
base_score = max(base_score, name_score)
|
|
363
|
+
|
|
364
|
+
# 2. Keyword overlap bonus (ID, display name, AND description for better recall)
|
|
365
|
+
# Split on underscores, hyphens, and whitespace to extract individual keywords
|
|
366
|
+
node_keywords = set(re.findall(r'[a-z0-9]+', node_type_lower))
|
|
367
|
+
pkg_keywords = set(re.findall(r'[a-z0-9]+', pkg_id_lower))
|
|
368
|
+
if pkg_data.display_name:
|
|
369
|
+
pkg_keywords.update(re.findall(r'[a-z0-9]+', pkg_data.display_name.lower()))
|
|
370
|
+
|
|
371
|
+
# Add description keywords but with limited weight
|
|
372
|
+
desc_keywords = set()
|
|
373
|
+
if pkg_data.description:
|
|
374
|
+
desc_keywords = set(re.findall(r'[a-z0-9]+', pkg_data.description.lower()))
|
|
375
|
+
|
|
376
|
+
# Calculate overlap for ID/name vs description separately
|
|
377
|
+
id_overlap = len(node_keywords & pkg_keywords) / max(len(node_keywords), 1)
|
|
378
|
+
desc_overlap = len(node_keywords & desc_keywords) / max(len(node_keywords), 1)
|
|
379
|
+
|
|
380
|
+
# Combine with weighted importance:
|
|
381
|
+
# - ID/name match is primary (0.50 max bonus - increased to dominate over fuzzy)
|
|
382
|
+
# - Description match is secondary boost (0.15 max bonus)
|
|
383
|
+
keyword_bonus = (id_overlap * 0.50) + (desc_overlap * 0.15)
|
|
384
|
+
|
|
385
|
+
# 3. Hint pattern bonuses (THE HEURISTICS!)
|
|
386
|
+
hint_bonus = self._detect_hint_patterns(node_type, pkg_id_lower)
|
|
387
|
+
|
|
388
|
+
# 4. Installed package bonus
|
|
389
|
+
installed_bonus = 0.10 if is_installed else 0.0
|
|
390
|
+
|
|
391
|
+
# 5. Popularity bonus (log scale to prevent overwhelming text relevance)
|
|
392
|
+
# 10 stars → 0.01, 100 stars → 0.02, 1000 stars → 0.03, 10000 stars → 0.04
|
|
393
|
+
import math
|
|
394
|
+
popularity_bonus = 0.0
|
|
395
|
+
if pkg_data.github_stars and pkg_data.github_stars > 0:
|
|
396
|
+
popularity_bonus = math.log10(pkg_data.github_stars) * 0.1
|
|
397
|
+
|
|
398
|
+
# Combine - don't cap at 1.0 so popularity can differentiate high-scoring packages
|
|
399
|
+
final_score = base_score + keyword_bonus + hint_bonus + installed_bonus + popularity_bonus
|
|
400
|
+
return final_score
|
|
401
|
+
|
|
402
|
+
def _detect_hint_patterns(
|
|
403
|
+
self,
|
|
404
|
+
node_type: str,
|
|
405
|
+
pkg_id_lower: str
|
|
406
|
+
) -> float:
|
|
407
|
+
"""Detect hint patterns and return bonus score.
|
|
408
|
+
|
|
409
|
+
This is where heuristics live - as score boosters!
|
|
410
|
+
These bonuses are now more conservative to prevent score inflation.
|
|
411
|
+
"""
|
|
412
|
+
max_bonus = 0.0
|
|
413
|
+
|
|
414
|
+
# Pattern 1: Parenthetical/Bracket hint (STRONG signal)
|
|
415
|
+
# "Node Name (package)" → "package" OR "Node Name [package]" → "package"
|
|
416
|
+
for open_char, close_char in [("(", ")"), ("[", "]")]:
|
|
417
|
+
if open_char in node_type and close_char in node_type:
|
|
418
|
+
hint = node_type.split(open_char)[-1].rstrip(close_char).strip().lower()
|
|
419
|
+
if len(hint) >= 3: # Minimum length to avoid false positives
|
|
420
|
+
if hint == pkg_id_lower:
|
|
421
|
+
max_bonus = max(max_bonus, 0.50) # Exact match
|
|
422
|
+
elif hint in pkg_id_lower:
|
|
423
|
+
max_bonus = max(max_bonus, 0.40) # Substring match
|
|
424
|
+
|
|
425
|
+
# Pattern 2: Pipe separator
|
|
426
|
+
# "Node Name | PackageName" → "PackageName"
|
|
427
|
+
if "|" in node_type:
|
|
428
|
+
parts = node_type.split("|")
|
|
429
|
+
if len(parts) == 2:
|
|
430
|
+
hint = parts[1].strip().lower()
|
|
431
|
+
if hint in pkg_id_lower:
|
|
432
|
+
max_bonus = max(max_bonus, 0.35) # Reduced from 0.55
|
|
433
|
+
|
|
434
|
+
# Pattern 3: Dash/Colon separator
|
|
435
|
+
# "Node Name - Package" or "Node: Package"
|
|
436
|
+
for sep in [" - ", ": "]:
|
|
437
|
+
if sep in node_type:
|
|
438
|
+
parts = node_type.split(sep)
|
|
439
|
+
if len(parts) >= 2:
|
|
440
|
+
hint = parts[-1].strip().lower()
|
|
441
|
+
if len(hint) >= 3 and hint in pkg_id_lower:
|
|
442
|
+
max_bonus = max(max_bonus, 0.30) # Reduced from 0.50
|
|
443
|
+
break
|
|
444
|
+
|
|
445
|
+
# Pattern 4: Fragment match (weakest) - removed to reduce noise
|
|
446
|
+
# This was adding too many false positives
|
|
447
|
+
|
|
448
|
+
return max_bonus
|
|
449
|
+
|
|
450
|
+
def _score_to_confidence(self, score: float) -> str:
|
|
451
|
+
"""Convert numeric score to confidence label."""
|
|
452
|
+
if score >= 0.85:
|
|
453
|
+
return "high"
|
|
454
|
+
elif score >= 0.65:
|
|
455
|
+
return "good"
|
|
456
|
+
elif score >= 0.45:
|
|
457
|
+
return "possible"
|
|
458
|
+
else:
|
|
459
|
+
return "low"
|