agent-api-server 2.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_api_server/__init__.py +0 -0
- agent_api_server/api/__init__.py +0 -0
- agent_api_server/api/v1/__init__.py +0 -0
- agent_api_server/api/v1/api.py +25 -0
- agent_api_server/api/v1/config.py +57 -0
- agent_api_server/api/v1/graph.py +59 -0
- agent_api_server/api/v1/schema.py +57 -0
- agent_api_server/api/v1/thread.py +563 -0
- agent_api_server/cache/__init__.py +0 -0
- agent_api_server/cache/redis_cache.py +385 -0
- agent_api_server/callback_handler.py +18 -0
- agent_api_server/client/css/styles.css +1202 -0
- agent_api_server/client/favicon.ico +0 -0
- agent_api_server/client/index.html +102 -0
- agent_api_server/client/js/app.js +1499 -0
- agent_api_server/client/js/index.umd.js +824 -0
- agent_api_server/config_center/config_center.py +239 -0
- agent_api_server/configs/__init__.py +3 -0
- agent_api_server/configs/config.py +163 -0
- agent_api_server/dynamic_llm/__init__.py +0 -0
- agent_api_server/dynamic_llm/dynamic_llm.py +331 -0
- agent_api_server/listener.py +530 -0
- agent_api_server/log/__init__.py +0 -0
- agent_api_server/log/formatters.py +122 -0
- agent_api_server/log/logging.json +50 -0
- agent_api_server/mcp_convert/__init__.py +0 -0
- agent_api_server/mcp_convert/mcp_convert.py +375 -0
- agent_api_server/memeory/__init__.py +0 -0
- agent_api_server/memeory/postgres.py +233 -0
- agent_api_server/register/__init__.py +0 -0
- agent_api_server/register/register.py +65 -0
- agent_api_server/service.py +354 -0
- agent_api_server/service_hub/service_hub.py +233 -0
- agent_api_server/service_hub/service_hub_test.py +700 -0
- agent_api_server/shared/__init__.py +0 -0
- agent_api_server/shared/ase.py +54 -0
- agent_api_server/shared/base_model.py +103 -0
- agent_api_server/shared/common.py +110 -0
- agent_api_server/shared/decode_token.py +107 -0
- agent_api_server/shared/detect_message.py +410 -0
- agent_api_server/shared/get_model_info.py +491 -0
- agent_api_server/shared/message.py +419 -0
- agent_api_server/shared/util_func.py +372 -0
- agent_api_server/sso_service/__init__.py +1 -0
- agent_api_server/sso_service/sdk/__init__.py +1 -0
- agent_api_server/sso_service/sdk/client.py +224 -0
- agent_api_server/sso_service/sdk/credential.py +11 -0
- agent_api_server/sso_service/sdk/encoding.py +22 -0
- agent_api_server/sso_service/sso_service.py +177 -0
- agent_api_server-2.1.7.dist-info/METADATA +130 -0
- agent_api_server-2.1.7.dist-info/RECORD +52 -0
- agent_api_server-2.1.7.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,410 @@
|
|
|
1
|
+
from typing import Literal, List, Tuple, Dict, Set
|
|
2
|
+
import re
|
|
3
|
+
import json
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from collections import defaultdict
|
|
6
|
+
|
|
7
|
+
ContentType = Literal["markdown", "json", "html", "xml", "python", "yaml", "text"]
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class ContentTypeDetector:
|
|
12
|
+
name: ContentType
|
|
13
|
+
detector: callable
|
|
14
|
+
priority: int = 0
|
|
15
|
+
confidence_threshold: float = 0.7
|
|
16
|
+
exclusive: bool = False
|
|
17
|
+
|
|
18
|
+
def detect_content_type(content: str) -> ContentType:
|
|
19
|
+
content = content.strip()
|
|
20
|
+
if not content:
|
|
21
|
+
return "text"
|
|
22
|
+
|
|
23
|
+
results: List[Tuple[ContentType, float]] = []
|
|
24
|
+
detectors = sorted(CONTENT_TYPE_DETECTORS, key=lambda x: (-x.priority, x.name))
|
|
25
|
+
|
|
26
|
+
for detector in detectors:
|
|
27
|
+
try:
|
|
28
|
+
confidence = detector.detector(content)
|
|
29
|
+
if confidence > 0:
|
|
30
|
+
results.append((detector.name, confidence))
|
|
31
|
+
# If this type is exclusive and meets threshold, return immediately
|
|
32
|
+
if detector.exclusive and confidence >= detector.confidence_threshold:
|
|
33
|
+
return detector.name
|
|
34
|
+
except Exception:
|
|
35
|
+
continue
|
|
36
|
+
|
|
37
|
+
if not results:
|
|
38
|
+
return "text"
|
|
39
|
+
|
|
40
|
+
results = _adjust_confidences(content, results)
|
|
41
|
+
|
|
42
|
+
valid_results = [
|
|
43
|
+
(name, conf) for name, conf in results
|
|
44
|
+
if conf >= next(d.confidence_threshold for d in detectors if d.name == name)
|
|
45
|
+
]
|
|
46
|
+
|
|
47
|
+
if not valid_results:
|
|
48
|
+
return "text"
|
|
49
|
+
|
|
50
|
+
best_match = max(valid_results, key=lambda x: (x[1], -next(
|
|
51
|
+
i for i, d in enumerate(detectors) if d.name == x[0]
|
|
52
|
+
)))
|
|
53
|
+
|
|
54
|
+
if _verify_content_type(content, best_match[0]):
|
|
55
|
+
return best_match[0]
|
|
56
|
+
elif len(valid_results) > 1:
|
|
57
|
+
second_best = max(
|
|
58
|
+
[r for r in valid_results if r[0] != best_match[0]],
|
|
59
|
+
key=lambda x: (x[1], -next(i for i, d in enumerate(detectors) if d.name == x[0])),
|
|
60
|
+
default=None
|
|
61
|
+
)
|
|
62
|
+
if second_best and _verify_content_type(content, second_best[0]):
|
|
63
|
+
return second_best[0]
|
|
64
|
+
|
|
65
|
+
return "text"
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _adjust_confidences(content: str, results: List[Tuple[ContentType, float]]) -> List[Tuple[ContentType, float]]:
|
|
69
|
+
"""Apply content-specific adjustments to confidence scores."""
|
|
70
|
+
type_scores = {name: conf for name, conf in results}
|
|
71
|
+
|
|
72
|
+
# Penalize HTML if it looks like template syntax
|
|
73
|
+
if "html" in type_scores and any(
|
|
74
|
+
re.search(p, content) for p in [r'\{\{.*\}\}', r'{%.*%}', r'<\?php']):
|
|
75
|
+
type_scores["html"] *= 0.7
|
|
76
|
+
|
|
77
|
+
# Boost JSON if it's the only structured format detected
|
|
78
|
+
if "json" in type_scores and len(type_scores) == 1:
|
|
79
|
+
type_scores["json"] = min(1.0, type_scores["json"] * 1.1)
|
|
80
|
+
|
|
81
|
+
# Penalize YAML if it looks like plain text with colons
|
|
82
|
+
if "yaml" in type_scores and not any(
|
|
83
|
+
re.search(p, content) for p in [r'^\s*-\s+', r'^\s*\w+:\s*\n\s+', r'^---\s*$']
|
|
84
|
+
):
|
|
85
|
+
type_scores["yaml"] *= 0.6
|
|
86
|
+
|
|
87
|
+
# Penalize Markdown if it has too many consecutive non-MD lines
|
|
88
|
+
if "markdown" in type_scores:
|
|
89
|
+
lines = content.splitlines()
|
|
90
|
+
md_lines = sum(1 for line in lines if any(
|
|
91
|
+
re.search(p, line) for p in MD_PATTERNS
|
|
92
|
+
))
|
|
93
|
+
if md_lines / len(lines) < 0.2:
|
|
94
|
+
type_scores["markdown"] *= 0.5
|
|
95
|
+
|
|
96
|
+
return list(type_scores.items())
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def _verify_content_type(content: str, content_type: ContentType) -> bool:
|
|
100
|
+
"""Perform additional verification for the detected content type."""
|
|
101
|
+
if content_type == "json":
|
|
102
|
+
try:
|
|
103
|
+
json.loads(content)
|
|
104
|
+
return True
|
|
105
|
+
except:
|
|
106
|
+
return False
|
|
107
|
+
elif content_type == "python":
|
|
108
|
+
return _verify_python(content)
|
|
109
|
+
elif content_type == "html":
|
|
110
|
+
return _verify_html(content)
|
|
111
|
+
elif content_type == "xml":
|
|
112
|
+
return _verify_xml(content)
|
|
113
|
+
return True
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def _verify_python(content: str) -> bool:
|
|
117
|
+
"""Additional verification for Python code."""
|
|
118
|
+
lines = [line for line in content.splitlines() if line.strip()]
|
|
119
|
+
if not lines:
|
|
120
|
+
return False
|
|
121
|
+
|
|
122
|
+
# Check for valid Python syntax structures
|
|
123
|
+
def_count = len(re.findall(r'^\s*def\s+\w+\s*$', content, re.MULTILINE))
|
|
124
|
+
class_count = len(re.findall(r'^\s*class\s+\w+', content, re.MULTILINE))
|
|
125
|
+
import_count = len(re.findall(r'^\s*import\s+\w+', content, re.MULTILINE))
|
|
126
|
+
|
|
127
|
+
return (def_count + class_count + import_count) >= 1
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def _verify_html(content: str) -> bool:
|
|
131
|
+
"""Additional verification for HTML."""
|
|
132
|
+
open_tags = re.findall(r'<([a-z]+)[^>]*>', content.lower())
|
|
133
|
+
close_tags = re.findall(r'</([a-z]+)>', content.lower())
|
|
134
|
+
|
|
135
|
+
if not open_tags and not close_tags:
|
|
136
|
+
return False
|
|
137
|
+
|
|
138
|
+
# Check if most opened tags are closed
|
|
139
|
+
tag_counts = defaultdict(int)
|
|
140
|
+
for tag in open_tags:
|
|
141
|
+
tag_counts[tag] += 1
|
|
142
|
+
for tag in close_tags:
|
|
143
|
+
tag_counts[tag] -= 1
|
|
144
|
+
|
|
145
|
+
properly_closed = sum(1 for count in tag_counts.values() if count <= 0)
|
|
146
|
+
return properly_closed / len(tag_counts) > 0.5 if tag_counts else False
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def _verify_xml(content: str) -> bool:
|
|
150
|
+
"""Additional verification for XML."""
|
|
151
|
+
if '<?xml version=' in content[:100].lower():
|
|
152
|
+
return True
|
|
153
|
+
|
|
154
|
+
# Check for balanced tags
|
|
155
|
+
tags = re.findall(r'<(/?)([a-z]+)[^>]*>', content.lower())
|
|
156
|
+
stack = []
|
|
157
|
+
for is_close, tag in tags:
|
|
158
|
+
if not is_close:
|
|
159
|
+
stack.append(tag)
|
|
160
|
+
else:
|
|
161
|
+
if not stack or stack[-1] != tag:
|
|
162
|
+
return False
|
|
163
|
+
stack.pop()
|
|
164
|
+
return not stack
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
# Predefined patterns for various content types
|
|
168
|
+
MD_PATTERNS = [
|
|
169
|
+
r'^#+\s', r'^[-*]\s', r'^\d+\.\s', r'^>\s',
|
|
170
|
+
r'$$.*$$\(.*$', r'\*\*.*\*\*|__.*__',
|
|
171
|
+
r'\*[^*]+\*|_[^_]+_', r'^```', r'^\|.*\|.*\|$',
|
|
172
|
+
r'^---$|^===', r'^`[^`]+`', r'!$$.*$$$.*$'
|
|
173
|
+
]
|
|
174
|
+
|
|
175
|
+
PYTHON_KEYWORDS = {
|
|
176
|
+
'def ', 'class ', 'import ', 'from ', 'try:', 'except:',
|
|
177
|
+
'if ', 'else:', 'elif ', 'for ', 'while ', 'with ',
|
|
178
|
+
'return ', 'yield ', 'async ', 'await ', 'raise ', 'lambda ',
|
|
179
|
+
'nonlocal ', 'global ', 'pass ', 'break ', 'continue '
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
PYTHON_CONSTRUCTS = [
|
|
183
|
+
r'^\s*@[\w\.]+', r'^\s*def\s+\w+\s*$', r'^\s*class\s+\w+',
|
|
184
|
+
r'^\s*[\w_]+\s*=', r'^\s*[\w_]+\s*:\s*\w+', r'^\s*[\w_]+\s*=[^=]',
|
|
185
|
+
r'^\s*async\s+def\s+', r'^\s*await\s+', r'^\s*\(\s*$\s*:'
|
|
186
|
+
]
|
|
187
|
+
|
|
188
|
+
HTML_PATTERNS = [
|
|
189
|
+
(r'<!doctype\s+html>', 0.3), (r'<html[\s>]', 0.2), (r'<head[\s>]', 0.2),
|
|
190
|
+
(r'<body[\s>]', 0.2), (r'<div[\s>]', 0.15), (r'<p[\s>]', 0.15),
|
|
191
|
+
(r'<a\s+href=', 0.2), (r'<img\s+src=', 0.2), (r'<script[\s>]', 0.2),
|
|
192
|
+
(r'<style[\s>]', 0.2), (r'</(html|head|body|div|p|a|img|script|style)>', 0.3)
|
|
193
|
+
]
|
|
194
|
+
|
|
195
|
+
XML_PATTERNS = [
|
|
196
|
+
(r'<\?xml\s+version=', 1.0), (r'^<[^!?][^>]*>.*</[^>]+>$', 0.9),
|
|
197
|
+
(r'<[^/][^>]*/>', 0.7), (r'<(/?)(\w+)[^>]*>', 0.5)
|
|
198
|
+
]
|
|
199
|
+
|
|
200
|
+
YAML_PATTERNS = [
|
|
201
|
+
(r'^\s*[\w-]+\s*:', 0.5), (r'^\s*-\s*\w+', 0.4), (r'^---$', 0.3),
|
|
202
|
+
(r'^\s*[\w-]+\s*:\s*\|', 0.4), (r'^\s*[\w-]+\s*:\s*>\s*$', 0.4),
|
|
203
|
+
(r'^\s*#', 0.1), (r'^\s*\w+:\s*\n\s+', 0.3)
|
|
204
|
+
]
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
# Detector implementations with improved accuracy
|
|
208
|
+
|
|
209
|
+
def _is_valid_json(content: str) -> float:
|
|
210
|
+
"""Enhanced JSON detector with better pattern matching."""
|
|
211
|
+
content = content.strip()
|
|
212
|
+
if not content:
|
|
213
|
+
return 0.0
|
|
214
|
+
|
|
215
|
+
# Quick check for JSON structure
|
|
216
|
+
if not (content.startswith(('{', '[')) and content.endswith(('}', ']'))):
|
|
217
|
+
return 0.0
|
|
218
|
+
|
|
219
|
+
# Check for common JSON patterns
|
|
220
|
+
json_patterns = [
|
|
221
|
+
r'^\s*\{\s*".*"\s*:\s*[^,]+(,\s*".*"\s*:\s*[^,]+)*\s*\}\s*$',
|
|
222
|
+
r'^\s*$$\s*[^,]*(,\s*[^,]+)*\s*$$\s*$',
|
|
223
|
+
r'"\w+"\s*:\s*(null|true|false|"[^"]*"|\d+\.?\d*)'
|
|
224
|
+
]
|
|
225
|
+
|
|
226
|
+
if not any(re.search(p, content, re.DOTALL) for p in json_patterns):
|
|
227
|
+
return 0.0
|
|
228
|
+
|
|
229
|
+
try:
|
|
230
|
+
json.loads(content)
|
|
231
|
+
return 1.0
|
|
232
|
+
except (ValueError, TypeError, json.JSONDecodeError):
|
|
233
|
+
return 0.0
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
def _looks_like_python(content: str) -> float:
|
|
237
|
+
"""Enhanced Python detector with better syntax analysis."""
|
|
238
|
+
lines = [line for line in content.splitlines() if line.strip()]
|
|
239
|
+
if not lines:
|
|
240
|
+
return 0.0
|
|
241
|
+
|
|
242
|
+
# Check for shebang
|
|
243
|
+
if lines[0].startswith('#!') and 'python' in lines[0].lower():
|
|
244
|
+
return 1.0
|
|
245
|
+
|
|
246
|
+
# Count Python-specific constructs
|
|
247
|
+
keyword_count = sum(
|
|
248
|
+
1 for line in lines
|
|
249
|
+
if any(kw in line for kw in PYTHON_KEYWORDS)
|
|
250
|
+
)
|
|
251
|
+
construct_count = sum(
|
|
252
|
+
1 for line in lines
|
|
253
|
+
if any(re.search(p, line) for p in PYTHON_CONSTRUCTS)
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
# Check indentation consistency
|
|
257
|
+
indent_consistent = True
|
|
258
|
+
indent_levels = set()
|
|
259
|
+
for line in lines:
|
|
260
|
+
if line.strip().startswith('#'):
|
|
261
|
+
continue
|
|
262
|
+
indent = len(line) - len(line.lstrip())
|
|
263
|
+
indent_levels.add(indent)
|
|
264
|
+
if len(indent_levels) > 3: # More than 3 indent levels is suspicious
|
|
265
|
+
indent_consistent = False
|
|
266
|
+
break
|
|
267
|
+
|
|
268
|
+
# Calculate confidence
|
|
269
|
+
score = 0.0
|
|
270
|
+
if keyword_count > 0:
|
|
271
|
+
score += min(0.5, keyword_count * 0.15)
|
|
272
|
+
if construct_count > 0:
|
|
273
|
+
score += min(0.4, construct_count * 0.12)
|
|
274
|
+
if indent_consistent and len(indent_levels) > 1:
|
|
275
|
+
score += 0.2
|
|
276
|
+
|
|
277
|
+
return min(1.0, score)
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
def _looks_like_html(content: str) -> float:
|
|
281
|
+
"""Enhanced HTML detector with better tag analysis."""
|
|
282
|
+
content = content.lower()
|
|
283
|
+
|
|
284
|
+
# Check for HTML5 doctype
|
|
285
|
+
if re.search(r'<!doctype\s+html>', content):
|
|
286
|
+
return 1.0
|
|
287
|
+
|
|
288
|
+
# Calculate score based on patterns
|
|
289
|
+
score = sum(
|
|
290
|
+
weight for pattern, weight in HTML_PATTERNS
|
|
291
|
+
if re.search(pattern, content)
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
# Check tag balance
|
|
295
|
+
open_tags = re.findall(r'<([a-z]+)[^>]*>', content)
|
|
296
|
+
close_tags = re.findall(r'</([a-z]+)>', content)
|
|
297
|
+
|
|
298
|
+
if open_tags and close_tags:
|
|
299
|
+
tag_balance = min(len(open_tags), len(close_tags)) / max(len(open_tags), len(close_tags), 1)
|
|
300
|
+
score += min(0.3, tag_balance * 0.5)
|
|
301
|
+
|
|
302
|
+
return min(1.0, score)
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
def _looks_like_xml(content: str) -> float:
|
|
306
|
+
"""Enhanced XML detector with better structure analysis."""
|
|
307
|
+
content = content.strip()
|
|
308
|
+
|
|
309
|
+
if '<?xml ' in content[:100].lower():
|
|
310
|
+
return 1.0
|
|
311
|
+
|
|
312
|
+
# Check for root element
|
|
313
|
+
root_elements = re.findall(r'<([^!?][^>]*)>', content[:500])
|
|
314
|
+
if not root_elements:
|
|
315
|
+
return 0.0
|
|
316
|
+
|
|
317
|
+
# Calculate score
|
|
318
|
+
score = sum(
|
|
319
|
+
weight for pattern, weight in XML_PATTERNS
|
|
320
|
+
if re.search(pattern, content)
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
# Check for balanced tags
|
|
324
|
+
tags = re.findall(r'<(/?)(\w+)[^>]*>', content)
|
|
325
|
+
if tags:
|
|
326
|
+
stack = []
|
|
327
|
+
balanced = True
|
|
328
|
+
for is_close, tag in tags:
|
|
329
|
+
if not is_close:
|
|
330
|
+
stack.append(tag)
|
|
331
|
+
else:
|
|
332
|
+
if not stack or stack[-1] != tag:
|
|
333
|
+
balanced = False
|
|
334
|
+
break
|
|
335
|
+
stack.pop()
|
|
336
|
+
if balanced and not stack:
|
|
337
|
+
score += 0.3
|
|
338
|
+
|
|
339
|
+
return min(1.0, score)
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
def _looks_like_markdown(content: str) -> float:
|
|
343
|
+
"""Check if content looks like Markdown with confidence score."""
|
|
344
|
+
lines = [line for line in content.splitlines() if line.strip()]
|
|
345
|
+
if not lines:
|
|
346
|
+
return 0.0
|
|
347
|
+
|
|
348
|
+
md_patterns = [
|
|
349
|
+
(r'^#+\s', 0.3), # Headers
|
|
350
|
+
(r'^[-*]\s', 0.2), # Unordered lists
|
|
351
|
+
(r'^\d+\.\s', 0.2), # Ordered lists
|
|
352
|
+
(r'^>\s', 0.2), # Blockquotes
|
|
353
|
+
(r'$$.*?$$$.*?$', 0.3), # Links - 修正这里
|
|
354
|
+
(r'\*\*.*?\*\*|__.*?__', 0.2), # Bold
|
|
355
|
+
(r'\*.*?\*|_.*?_', 0.2), # Italic
|
|
356
|
+
(r'^```', 0.3), # Code blocks
|
|
357
|
+
(r'^\|.*\|.*\|$', 0.3), # Tables
|
|
358
|
+
(r'^---$|^===', 0.2), # Horizontal rules
|
|
359
|
+
(r'^`[^`]+`', 0.2), # Inline code
|
|
360
|
+
(r'!$$.*?$$$.*?$', 0.3), # Images - 修正这里
|
|
361
|
+
]
|
|
362
|
+
|
|
363
|
+
md_lines = 0
|
|
364
|
+
total_lines = len(lines)
|
|
365
|
+
|
|
366
|
+
for line in lines:
|
|
367
|
+
for pattern, weight in md_patterns:
|
|
368
|
+
if re.search(pattern, line):
|
|
369
|
+
md_lines += 1
|
|
370
|
+
break
|
|
371
|
+
|
|
372
|
+
# Calculate confidence based on percentage of markdown lines
|
|
373
|
+
ratio = md_lines / total_lines
|
|
374
|
+
if ratio >= 0.3: # At least 30% of lines are markdown
|
|
375
|
+
return min(1.0, ratio * 1.5) # Scale to 0-1 range
|
|
376
|
+
|
|
377
|
+
return 0.0
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
def _looks_like_yaml(content: str) -> float:
|
|
381
|
+
"""Enhanced YAML detector with better structure analysis."""
|
|
382
|
+
lines = [line for line in content.splitlines() if line.strip()]
|
|
383
|
+
if not lines:
|
|
384
|
+
return 0.0
|
|
385
|
+
|
|
386
|
+
# Calculate score based on patterns
|
|
387
|
+
score = sum(
|
|
388
|
+
weight for pattern, weight in YAML_PATTERNS
|
|
389
|
+
if any(re.search(pattern, line) for line in lines)
|
|
390
|
+
)
|
|
391
|
+
|
|
392
|
+
# Check for common YAML structures
|
|
393
|
+
has_documents = any(re.search(r'^---\s*$', line) for line in lines)
|
|
394
|
+
has_nested = any(re.search(r'^\s*\w+:\s*\n\s+\w', line) for line in lines)
|
|
395
|
+
has_lists = any(re.search(r'^\s*-\s+', line) for line in lines)
|
|
396
|
+
|
|
397
|
+
if has_documents or has_nested or has_lists:
|
|
398
|
+
score = min(1.0, score * 1.2)
|
|
399
|
+
|
|
400
|
+
return min(1.0, score)
|
|
401
|
+
|
|
402
|
+
# Configured detectors with optimized thresholds
|
|
403
|
+
CONTENT_TYPE_DETECTORS: List[ContentTypeDetector] = [
|
|
404
|
+
ContentTypeDetector("json", _is_valid_json, priority=100, confidence_threshold=0.95, exclusive=True),
|
|
405
|
+
ContentTypeDetector("yaml", _looks_like_yaml, priority=90, confidence_threshold=0.8),
|
|
406
|
+
ContentTypeDetector("python", _looks_like_python, priority=80, confidence_threshold=0.75),
|
|
407
|
+
ContentTypeDetector("html", _looks_like_html, priority=70, confidence_threshold=0.7),
|
|
408
|
+
ContentTypeDetector("xml", _looks_like_xml, priority=60, confidence_threshold=0.8),
|
|
409
|
+
ContentTypeDetector("markdown", _looks_like_markdown, priority=40, confidence_threshold=0.65),
|
|
410
|
+
]
|