auto-analyser 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- auto_analyser-0.2.0/.coverage +0 -0
- auto_analyser-0.2.0/LICENSE +21 -0
- auto_analyser-0.2.0/PKG-INFO +13 -0
- auto_analyser-0.2.0/README.md +83 -0
- auto_analyser-0.2.0/poly-lens.example.yaml +36 -0
- auto_analyser-0.2.0/pyproject.toml +31 -0
- auto_analyser-0.2.0/src/auto_analyser/__init__.py +3 -0
- auto_analyser-0.2.0/src/auto_analyser/cli.py +125 -0
- auto_analyser-0.2.0/src/auto_analyser/config.py +67 -0
- auto_analyser-0.2.0/src/auto_analyser/detector.py +95 -0
- auto_analyser-0.2.0/src/auto_analyser/router.py +132 -0
- auto_analyser-0.2.0/src/multi_analyser/__init__.py +3 -0
- auto_analyser-0.2.0/src/multi_analyser/cli.py +128 -0
- auto_analyser-0.2.0/src/multi_analyser/config.py +64 -0
- auto_analyser-0.2.0/src/multi_analyser/detector.py +100 -0
- auto_analyser-0.2.0/src/multi_analyser/router.py +132 -0
- auto_analyser-0.2.0/tests/conftest.py +19 -0
- auto_analyser-0.2.0/tests/test_detector.py +58 -0
- auto_analyser-0.2.0/tests/test_router.py +79 -0
- auto_analyser-0.2.0/uv.lock +442 -0
|
Binary file
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Michael Borck
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: auto-analyser
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Auto-analyser — detect a file's format and route it to the right analyser family member
|
|
5
|
+
License-File: LICENSE
|
|
6
|
+
Requires-Python: >=3.11
|
|
7
|
+
Requires-Dist: httpx>=0.27.0
|
|
8
|
+
Requires-Dist: pyyaml>=6.0.0
|
|
9
|
+
Requires-Dist: rich>=13.0.0
|
|
10
|
+
Provides-Extra: dev
|
|
11
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
|
|
12
|
+
Requires-Dist: pytest>=8.0.0; extra == 'dev'
|
|
13
|
+
Requires-Dist: respx>=0.21.0; extra == 'dev'
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
# multi-analyser
|
|
2
|
+
|
|
3
|
+
Routes any file to the right analyser. Detects the file format, calls the appropriate tool, and returns the result — so you don't need to know which analyser handles which format.
|
|
4
|
+
|
|
5
|
+
Part of the [analyser family](#the-analyser-family).
|
|
6
|
+
|
|
7
|
+
## Install
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
pip install multi-analyser
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
Requires Python 3.11+. The analysers it calls must be installed and reachable separately.
|
|
14
|
+
|
|
15
|
+
## Usage
|
|
16
|
+
|
|
17
|
+
### CLI
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
# Detect which analyser would handle a file
|
|
21
|
+
multi-analyser detect report.pdf # report.pdf -> document-analyser
|
|
22
|
+
multi-analyser detect interview.mp3 # interview.mp3 -> speech-analyser
|
|
23
|
+
multi-analyser detect data.xlsx # data.xlsx -> records-analyser
|
|
24
|
+
|
|
25
|
+
# Analyse a file — auto-detects format and routes
|
|
26
|
+
multi-analyser analyse report.pdf
|
|
27
|
+
multi-analyser analyse recording.mp3 --json
|
|
28
|
+
|
|
29
|
+
# Force a specific analyser
|
|
30
|
+
multi-analyser analyse interview.mp4 --analyser speech-analyser
|
|
31
|
+
|
|
32
|
+
# Check which analysers are reachable
|
|
33
|
+
multi-analyser status
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
### Python
|
|
37
|
+
|
|
38
|
+
```python
|
|
39
|
+
from poly_lens import Router
|
|
40
|
+
|
|
41
|
+
router = Router()
|
|
42
|
+
result = router.route("report.pdf")
|
|
43
|
+
print(result["routed_to"]) # "document-analyser"
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
## Configuration
|
|
47
|
+
|
|
48
|
+
multi-analyser ships with built-in defaults (document-analyser on `localhost:8000`, speech-analyser via CLI, etc.). Override with a YAML config file at `./multi-analyser.yaml` or `~/.config/multi-analyser/config.yaml`:
|
|
49
|
+
|
|
50
|
+
```yaml
|
|
51
|
+
lenses:
|
|
52
|
+
document-analyser:
|
|
53
|
+
type: http
|
|
54
|
+
url: http://localhost:8000
|
|
55
|
+
extensions: [.pdf, .docx, .pptx, .txt, .md]
|
|
56
|
+
|
|
57
|
+
speech-analyser:
|
|
58
|
+
type: cli
|
|
59
|
+
command: speech-analyser
|
|
60
|
+
extensions: [.mp3, .wav, .m4a, .ogg, .flac, .mp4, .mov]
|
|
61
|
+
|
|
62
|
+
records-analyser:
|
|
63
|
+
type: http
|
|
64
|
+
url: http://localhost:8003
|
|
65
|
+
extensions: [.csv, .tsv, .xlsx, .parquet, .db, .sqlite]
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## The analyser family
|
|
69
|
+
|
|
70
|
+
Low-level analysis tools. Each accepts files directly and returns structured JSON. Build your own UI or pipeline on top.
|
|
71
|
+
|
|
72
|
+
| Package | Handles |
|
|
73
|
+
|---|---|
|
|
74
|
+
| [speech-analyser](https://github.com/michael-borck/speech-analyser) | audio and video files — transcript and speech metrics |
|
|
75
|
+
| [video-analyser](https://github.com/michael-borck/video-analyser) | video files — frames, scenes, and visual quality |
|
|
76
|
+
| [document-analyser](https://github.com/michael-borck/document-analyser) | PDF, DOCX, PPTX, TXT — text and readability |
|
|
77
|
+
| [code-analyser](https://github.com/michael-borck/code-analyser) | source code — style, complexity, and quality metrics |
|
|
78
|
+
| [records-analyser](https://github.com/michael-borck/records-analyser) | CSV, Excel, SQLite, Parquet, JSON — data profiling |
|
|
79
|
+
| [multi-analyser](https://github.com/michael-borck/multi-analyser) | any file — detects format and routes to the right tool |
|
|
80
|
+
|
|
81
|
+
## License
|
|
82
|
+
|
|
83
|
+
MIT
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# poly-lens configuration
|
|
2
|
+
# Copy to ~/.config/poly-lens/config.yaml or ./poly-lens.yaml and adjust.
|
|
3
|
+
#
|
|
4
|
+
# Rate limiting: handled by reverse proxy (nginx, Caddy) for public deployments.
|
|
5
|
+
# Each lens can be run as CLI (default, no server needed) or HTTP (start server first).
|
|
6
|
+
|
|
7
|
+
lenses:
|
|
8
|
+
document-lens:
|
|
9
|
+
type: http
|
|
10
|
+
url: http://localhost:8000
|
|
11
|
+
|
|
12
|
+
# audio-lens: CLI by default (audiolens must be installed).
|
|
13
|
+
# Switch to http if running: audiolens serve --port 8001
|
|
14
|
+
audio-lens:
|
|
15
|
+
type: cli
|
|
16
|
+
command: audiolens
|
|
17
|
+
# audio-lens:
|
|
18
|
+
# type: http
|
|
19
|
+
# url: http://localhost:8001
|
|
20
|
+
|
|
21
|
+
# data-lens: CLI by default (datalens must be installed).
|
|
22
|
+
# Switch to http if running: datalens serve --port 8002
|
|
23
|
+
data-lens:
|
|
24
|
+
type: cli
|
|
25
|
+
command: datalens
|
|
26
|
+
# data-lens:
|
|
27
|
+
# type: http
|
|
28
|
+
# url: http://localhost:8002
|
|
29
|
+
|
|
30
|
+
code-lens:
|
|
31
|
+
type: http
|
|
32
|
+
url: http://localhost:8003
|
|
33
|
+
|
|
34
|
+
video-lens:
|
|
35
|
+
type: cli
|
|
36
|
+
command: videolens
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "auto-analyser"
|
|
7
|
+
version = "0.2.0"
|
|
8
|
+
description = "Auto-analyser — detect a file's format and route it to the right analyser family member"
|
|
9
|
+
requires-python = ">=3.11"
|
|
10
|
+
dependencies = [
|
|
11
|
+
"httpx>=0.27.0",
|
|
12
|
+
"PyYAML>=6.0.0",
|
|
13
|
+
"rich>=13.0.0",
|
|
14
|
+
]
|
|
15
|
+
|
|
16
|
+
[project.optional-dependencies]
|
|
17
|
+
dev = [
|
|
18
|
+
"pytest>=8.0.0",
|
|
19
|
+
"pytest-cov>=4.0.0",
|
|
20
|
+
"respx>=0.21.0",
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
[project.scripts]
|
|
24
|
+
auto-analyser = "auto_analyser.cli:main"
|
|
25
|
+
|
|
26
|
+
[tool.hatch.build.targets.wheel]
|
|
27
|
+
packages = ["src/auto_analyser"]
|
|
28
|
+
|
|
29
|
+
[tool.pytest.ini_options]
|
|
30
|
+
testpaths = ["tests"]
|
|
31
|
+
pythonpath = ["src"]
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
"""auto-analyser CLI — file analysis router.
|
|
2
|
+
|
|
3
|
+
Usage:
|
|
4
|
+
auto-analyser report.pdf
|
|
5
|
+
auto-analyser data.csv --analyser records-analyser
|
|
6
|
+
auto-analyser recording.mp3 --json
|
|
7
|
+
auto-analyser detect notebook.ipynb
|
|
8
|
+
auto-analyser status
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import json
|
|
12
|
+
import sys
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def main() -> None:
|
|
17
|
+
import argparse
|
|
18
|
+
|
|
19
|
+
parser = argparse.ArgumentParser(
|
|
20
|
+
prog="auto-analyser",
|
|
21
|
+
description="Route files to the right analyser",
|
|
22
|
+
)
|
|
23
|
+
sub = parser.add_subparsers(dest="command", required=True)
|
|
24
|
+
|
|
25
|
+
analyse = sub.add_parser("analyse", help="Analyse a file")
|
|
26
|
+
analyse.add_argument("file", type=Path, help="File to analyse")
|
|
27
|
+
analyse.add_argument("--analyser", help="Force a specific analyser (e.g. code-analyser)")
|
|
28
|
+
analyse.add_argument("--json", action="store_true", dest="as_json", help="Output raw JSON")
|
|
29
|
+
|
|
30
|
+
detect_cmd = sub.add_parser("detect", help="Show which analyser would handle a file")
|
|
31
|
+
detect_cmd.add_argument("file", type=Path)
|
|
32
|
+
|
|
33
|
+
sub.add_parser("status", help="Show configured analysers and whether they are reachable")
|
|
34
|
+
|
|
35
|
+
args = parser.parse_args()
|
|
36
|
+
|
|
37
|
+
if args.command == "analyse":
|
|
38
|
+
_cmd_analyse(args)
|
|
39
|
+
elif args.command == "detect":
|
|
40
|
+
_cmd_detect(args)
|
|
41
|
+
elif args.command == "status":
|
|
42
|
+
_cmd_status()
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _cmd_analyse(args) -> None:
|
|
46
|
+
from .router import Router, RoutingError
|
|
47
|
+
|
|
48
|
+
router = Router()
|
|
49
|
+
|
|
50
|
+
try:
|
|
51
|
+
result = router.route(args.file, analyser_name=args.analyser)
|
|
52
|
+
except RoutingError as e:
|
|
53
|
+
if args.as_json:
|
|
54
|
+
print(json.dumps({"error": str(e)}, indent=2, default=str), file=sys.stderr)
|
|
55
|
+
else:
|
|
56
|
+
print(f"Error: {e}", file=sys.stderr)
|
|
57
|
+
sys.exit(1)
|
|
58
|
+
|
|
59
|
+
if args.as_json:
|
|
60
|
+
print(json.dumps(result, indent=2, default=str))
|
|
61
|
+
return
|
|
62
|
+
|
|
63
|
+
if result.get("warning"):
|
|
64
|
+
print(f"Note: {result['warning']}\n")
|
|
65
|
+
|
|
66
|
+
print(f"Routed to: {result.get('routed_to', 'unknown')}")
|
|
67
|
+
print()
|
|
68
|
+
print("Full result (use --json for machine-readable output):")
|
|
69
|
+
_print_summary({k: v for k, v in result.items() if k not in ("routed_to", "warning")})
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _cmd_detect(args) -> None:
|
|
73
|
+
from .detector import detect
|
|
74
|
+
|
|
75
|
+
result = detect(args.file)
|
|
76
|
+
if result.warning:
|
|
77
|
+
print(f"Note: {result.warning}")
|
|
78
|
+
if result.analyser:
|
|
79
|
+
print(f"{args.file.name} -> {result.analyser}")
|
|
80
|
+
else:
|
|
81
|
+
print(f"{args.file.name} -> unknown (no analyser configured for {result.extension})")
|
|
82
|
+
sys.exit(1)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _cmd_status() -> None:
|
|
86
|
+
from .config import load_config
|
|
87
|
+
import httpx
|
|
88
|
+
|
|
89
|
+
config = load_config()
|
|
90
|
+
print("Configured analysers:\n")
|
|
91
|
+
|
|
92
|
+
for name, cfg in config.analysers.items():
|
|
93
|
+
if cfg.type == "http":
|
|
94
|
+
try:
|
|
95
|
+
httpx.get(f"{cfg.url}/health", timeout=3).raise_for_status()
|
|
96
|
+
status = "reachable"
|
|
97
|
+
except Exception:
|
|
98
|
+
status = "not reachable"
|
|
99
|
+
print(f" {name:<22} http {cfg.url} {status}")
|
|
100
|
+
else:
|
|
101
|
+
import subprocess
|
|
102
|
+
try:
|
|
103
|
+
subprocess.run([cfg.command, "--version"], capture_output=True, timeout=5)
|
|
104
|
+
status = "installed"
|
|
105
|
+
except FileNotFoundError:
|
|
106
|
+
status = "not found"
|
|
107
|
+
except subprocess.TimeoutExpired:
|
|
108
|
+
status = "installed (timeout on --version)"
|
|
109
|
+
print(f" {name:<22} cli {cfg.command} {status}")
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def _print_summary(data: dict) -> None:
|
|
113
|
+
for key, value in data.items():
|
|
114
|
+
if isinstance(value, dict):
|
|
115
|
+
print(f" {key}:")
|
|
116
|
+
for k, v in value.items():
|
|
117
|
+
print(f" {k}: {v}")
|
|
118
|
+
elif isinstance(value, list):
|
|
119
|
+
print(f" {key}: [{len(value)} items]")
|
|
120
|
+
else:
|
|
121
|
+
print(f" {key}: {value}")
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
if __name__ == "__main__":
|
|
125
|
+
main()
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
from dataclasses import dataclass, field
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Literal
|
|
4
|
+
|
|
5
|
+
import yaml
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass
|
|
9
|
+
class AnalyserConfig:
|
|
10
|
+
type: Literal["http", "cli"]
|
|
11
|
+
url: str | None = None # for type=http
|
|
12
|
+
command: str | None = None # for type=cli
|
|
13
|
+
formats: list[str] = field(default_factory=list)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class FamilyConfig:
|
|
18
|
+
analysers: dict[str, AnalyserConfig]
|
|
19
|
+
|
|
20
|
+
def get(self, analyser_name: str) -> AnalyserConfig | None:
|
|
21
|
+
return self.analysers.get(analyser_name)
|
|
22
|
+
|
|
23
|
+
def available(self) -> list[str]:
|
|
24
|
+
return list(self.analysers.keys())
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
_DEFAULTS: dict[str, dict] = {
|
|
28
|
+
"document-analyser": {"type": "http", "url": "http://localhost:8000"},
|
|
29
|
+
"speech-analyser": {"type": "http", "url": "http://localhost:8001"},
|
|
30
|
+
"video-analyser": {"type": "http", "url": "http://localhost:8002"},
|
|
31
|
+
"records-analyser": {"type": "http", "url": "http://localhost:8003"},
|
|
32
|
+
"code-analyser": {"type": "http", "url": "http://localhost:8004"},
|
|
33
|
+
"wordpress-analyser": {"type": "http", "url": "http://localhost:8005"},
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
_CONFIG_PATHS = [
|
|
37
|
+
Path("auto-analyser.yaml"),
|
|
38
|
+
Path.home() / ".config" / "auto-analyser" / "config.yaml",
|
|
39
|
+
]
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def load_config() -> FamilyConfig:
|
|
43
|
+
"""Load family config. Falls back to built-in defaults if no file found."""
|
|
44
|
+
raw: dict = {}
|
|
45
|
+
|
|
46
|
+
for path in _CONFIG_PATHS:
|
|
47
|
+
if path.exists():
|
|
48
|
+
with open(path) as f:
|
|
49
|
+
raw = yaml.safe_load(f) or {}
|
|
50
|
+
break
|
|
51
|
+
|
|
52
|
+
merged = {**_DEFAULTS, **raw.get("analysers", {})}
|
|
53
|
+
analysers = {}
|
|
54
|
+
for name, cfg in merged.items():
|
|
55
|
+
if "type" not in cfg:
|
|
56
|
+
raise ValueError(
|
|
57
|
+
f"Analyser '{name}' in config is missing required field 'type'. "
|
|
58
|
+
f"Must be 'http' or 'cli'."
|
|
59
|
+
)
|
|
60
|
+
analysers[name] = AnalyserConfig(
|
|
61
|
+
type=cfg["type"],
|
|
62
|
+
url=cfg.get("url"),
|
|
63
|
+
command=cfg.get("command"),
|
|
64
|
+
formats=cfg.get("formats", []),
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
return FamilyConfig(analysers=analysers)
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
@dataclass
|
|
6
|
+
class DetectionResult:
|
|
7
|
+
analyser: str | None # None if unknown
|
|
8
|
+
extension: str
|
|
9
|
+
warning: str | None = None
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
# Extension → analyser name
|
|
13
|
+
_ROUTES: dict[str, str] = {
|
|
14
|
+
# document-analyser
|
|
15
|
+
".pdf": "document-analyser",
|
|
16
|
+
".docx": "document-analyser",
|
|
17
|
+
".pptx": "document-analyser",
|
|
18
|
+
".txt": "document-analyser",
|
|
19
|
+
".md": "document-analyser",
|
|
20
|
+
".qmd": "document-analyser",
|
|
21
|
+
".rst": "document-analyser",
|
|
22
|
+
# speech-analyser
|
|
23
|
+
".mp3": "speech-analyser",
|
|
24
|
+
".wav": "speech-analyser",
|
|
25
|
+
".m4a": "speech-analyser",
|
|
26
|
+
".ogg": "speech-analyser",
|
|
27
|
+
".flac": "speech-analyser",
|
|
28
|
+
".aac": "speech-analyser",
|
|
29
|
+
".opus": "speech-analyser",
|
|
30
|
+
# records-analyser
|
|
31
|
+
".csv": "records-analyser",
|
|
32
|
+
".tsv": "records-analyser",
|
|
33
|
+
".xlsx": "records-analyser",
|
|
34
|
+
".xls": "records-analyser",
|
|
35
|
+
".parquet": "records-analyser",
|
|
36
|
+
".sqlite": "records-analyser",
|
|
37
|
+
".db": "records-analyser",
|
|
38
|
+
".sqlite3": "records-analyser",
|
|
39
|
+
# records-analyser (ambiguous)
|
|
40
|
+
".json": "records-analyser",
|
|
41
|
+
".yaml": "records-analyser",
|
|
42
|
+
".yml": "records-analyser",
|
|
43
|
+
".xml": "records-analyser",
|
|
44
|
+
# code-analyser
|
|
45
|
+
".py": "code-analyser",
|
|
46
|
+
".js": "code-analyser",
|
|
47
|
+
".ts": "code-analyser",
|
|
48
|
+
".tsx": "code-analyser",
|
|
49
|
+
".jsx": "code-analyser",
|
|
50
|
+
".html": "code-analyser",
|
|
51
|
+
".css": "code-analyser",
|
|
52
|
+
".scss": "code-analyser",
|
|
53
|
+
".sql": "code-analyser",
|
|
54
|
+
".ipynb": "code-analyser",
|
|
55
|
+
# video-analyser
|
|
56
|
+
".mp4": "video-analyser",
|
|
57
|
+
".mov": "video-analyser",
|
|
58
|
+
".avi": "video-analyser",
|
|
59
|
+
".webm": "video-analyser",
|
|
60
|
+
".mkv": "video-analyser",
|
|
61
|
+
# wordpress-analyser
|
|
62
|
+
".php": "wordpress-analyser",
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
_AMBIGUOUS_WARNING = (
|
|
66
|
+
"{ext} files may be configuration data or structured datasets. "
|
|
67
|
+
"auto-analyser is routing to records-analyser. "
|
|
68
|
+
"For prose content, use document-analyser directly."
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
_NOTEBOOK_WARNING = (
|
|
72
|
+
"{ext} is a notebook format containing both code and prose. "
|
|
73
|
+
"code-analyser will analyse the code cells. "
|
|
74
|
+
"Pass extracted prose to document-analyser for writing quality analysis."
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def detect(file_path: Path) -> DetectionResult:
|
|
79
|
+
"""Detect which analyser should handle this file."""
|
|
80
|
+
ext = file_path.suffix.lower()
|
|
81
|
+
analyser = _ROUTES.get(ext)
|
|
82
|
+
|
|
83
|
+
warning = None
|
|
84
|
+
if ext in {".json", ".yaml", ".yml", ".xml"}:
|
|
85
|
+
warning = _AMBIGUOUS_WARNING.format(ext=ext.upper())
|
|
86
|
+
elif ext in {".ipynb", ".qmd", ".rmd"}:
|
|
87
|
+
warning = _NOTEBOOK_WARNING.format(ext=ext)
|
|
88
|
+
elif analyser is None:
|
|
89
|
+
warning = (
|
|
90
|
+
f"Unknown format: {ext}. "
|
|
91
|
+
f"auto-analyser does not know which analyser handles this file. "
|
|
92
|
+
f"Use an analyser directly or add a mapping to your auto-analyser.yaml."
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
return DetectionResult(analyser=analyser, extension=ext, warning=warning)
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import subprocess
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
import httpx
|
|
7
|
+
|
|
8
|
+
from .config import FamilyConfig, load_config
|
|
9
|
+
from .detector import detect
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class RoutingError(Exception):
|
|
13
|
+
"""Raised when auto-analyser cannot route or analyse a file."""
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class Router:
|
|
17
|
+
"""Routes a file to the appropriate analyser and returns the analysis."""
|
|
18
|
+
|
|
19
|
+
def __init__(self, config: FamilyConfig | None = None) -> None:
|
|
20
|
+
self._config = config or load_config()
|
|
21
|
+
|
|
22
|
+
def route(
|
|
23
|
+
self,
|
|
24
|
+
file_path: "Path | str",
|
|
25
|
+
analyser_name: str | None = None,
|
|
26
|
+
) -> dict[str, Any]:
|
|
27
|
+
"""Analyse a file by routing to the appropriate analyser.
|
|
28
|
+
|
|
29
|
+
Returns the analysis dict with a 'routed_to' key injected.
|
|
30
|
+
|
|
31
|
+
Raises:
|
|
32
|
+
RoutingError: if the file is missing, format unknown, analyser not
|
|
33
|
+
configured, or the analyser returns an error.
|
|
34
|
+
"""
|
|
35
|
+
if isinstance(file_path, str):
|
|
36
|
+
file_path = Path(file_path)
|
|
37
|
+
|
|
38
|
+
if not file_path.exists():
|
|
39
|
+
raise RoutingError(f"File not found: {file_path}")
|
|
40
|
+
if not file_path.is_file():
|
|
41
|
+
raise RoutingError(f"Not a file: {file_path}")
|
|
42
|
+
|
|
43
|
+
warning = None
|
|
44
|
+
|
|
45
|
+
if analyser_name is None:
|
|
46
|
+
detection = detect(file_path)
|
|
47
|
+
if detection.analyser is None:
|
|
48
|
+
raise RoutingError(
|
|
49
|
+
f"Unknown format: {file_path.suffix}. "
|
|
50
|
+
f"Use --analyser to specify an analyser directly."
|
|
51
|
+
)
|
|
52
|
+
analyser_name = detection.analyser
|
|
53
|
+
warning = detection.warning
|
|
54
|
+
|
|
55
|
+
analyser_cfg = self._config.get(analyser_name)
|
|
56
|
+
if analyser_cfg is None:
|
|
57
|
+
raise RoutingError(
|
|
58
|
+
f"Analyser '{analyser_name}' is not configured. "
|
|
59
|
+
f"Available: {self._config.available()}"
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
if analyser_cfg.type == "cli":
|
|
63
|
+
if not analyser_cfg.command:
|
|
64
|
+
raise RoutingError(
|
|
65
|
+
f"Analyser '{analyser_name}' has type=cli but no command configured."
|
|
66
|
+
)
|
|
67
|
+
data = self._call_cli(analyser_cfg.command, file_path)
|
|
68
|
+
elif analyser_cfg.type == "http":
|
|
69
|
+
if not analyser_cfg.url:
|
|
70
|
+
raise RoutingError(
|
|
71
|
+
f"Analyser '{analyser_name}' has type=http but no url configured."
|
|
72
|
+
)
|
|
73
|
+
data = self._call_http(analyser_cfg.url, file_path)
|
|
74
|
+
else:
|
|
75
|
+
raise RoutingError(f"Unknown analyser type: {analyser_cfg.type}")
|
|
76
|
+
|
|
77
|
+
data["routed_to"] = analyser_name
|
|
78
|
+
if warning:
|
|
79
|
+
data["warning"] = warning
|
|
80
|
+
return data
|
|
81
|
+
|
|
82
|
+
def _call_cli(self, command: str, file_path: Path) -> dict[str, Any]:
|
|
83
|
+
try:
|
|
84
|
+
proc = subprocess.run(
|
|
85
|
+
[command, str(file_path), "--json"],
|
|
86
|
+
capture_output=True,
|
|
87
|
+
text=True,
|
|
88
|
+
timeout=300,
|
|
89
|
+
)
|
|
90
|
+
if proc.returncode != 0:
|
|
91
|
+
try:
|
|
92
|
+
err = json.loads(proc.stderr)
|
|
93
|
+
msg = err.get("error", proc.stderr.strip())
|
|
94
|
+
except (json.JSONDecodeError, AttributeError):
|
|
95
|
+
msg = proc.stderr.strip() or f"{command} exited with code {proc.returncode}"
|
|
96
|
+
raise RoutingError(msg)
|
|
97
|
+
try:
|
|
98
|
+
return json.loads(proc.stdout)
|
|
99
|
+
except json.JSONDecodeError as e:
|
|
100
|
+
raise RoutingError(
|
|
101
|
+
f"{command} returned invalid JSON: {e}. "
|
|
102
|
+
f"stdout={proc.stdout[:200]!r}"
|
|
103
|
+
)
|
|
104
|
+
except FileNotFoundError:
|
|
105
|
+
raise RoutingError(f"CLI tool '{command}' not found. Is it installed?")
|
|
106
|
+
except RoutingError:
|
|
107
|
+
raise
|
|
108
|
+
except Exception as e:
|
|
109
|
+
raise RoutingError(str(e)) from e
|
|
110
|
+
|
|
111
|
+
def _call_http(self, url: str, file_path: Path) -> dict[str, Any]:
|
|
112
|
+
"""POST file to {url}/analyse."""
|
|
113
|
+
try:
|
|
114
|
+
with open(file_path, "rb") as f:
|
|
115
|
+
with httpx.Client(timeout=300) as client:
|
|
116
|
+
response = client.post(
|
|
117
|
+
f"{url}/analyse",
|
|
118
|
+
files={"file": (file_path.name, f)},
|
|
119
|
+
)
|
|
120
|
+
if not response.is_success:
|
|
121
|
+
try:
|
|
122
|
+
detail = response.json().get("detail", response.text)
|
|
123
|
+
except Exception:
|
|
124
|
+
detail = response.text
|
|
125
|
+
raise RoutingError(f"HTTP {response.status_code}: {detail}")
|
|
126
|
+
return response.json()
|
|
127
|
+
except httpx.ConnectError:
|
|
128
|
+
raise RoutingError(f"Cannot connect to {url}. Is the service running?")
|
|
129
|
+
except RoutingError:
|
|
130
|
+
raise
|
|
131
|
+
except Exception as e:
|
|
132
|
+
raise RoutingError(str(e)) from e
|