conversation-analyser 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. conversation_analyser-0.1.0/PKG-INFO +139 -0
  2. conversation_analyser-0.1.0/README.md +116 -0
  3. conversation_analyser-0.1.0/pyproject.toml +40 -0
  4. conversation_analyser-0.1.0/src/conversation_analyser/__init__.py +16 -0
  5. conversation_analyser-0.1.0/src/conversation_analyser/analytics.py +127 -0
  6. conversation_analyser-0.1.0/src/conversation_analyser/api.py +62 -0
  7. conversation_analyser-0.1.0/src/conversation_analyser/cli.py +129 -0
  8. conversation_analyser-0.1.0/src/conversation_analyser/config.py +69 -0
  9. conversation_analyser-0.1.0/src/conversation_analyser/embeddings.py +41 -0
  10. conversation_analyser-0.1.0/src/conversation_analyser/llm.py +94 -0
  11. conversation_analyser-0.1.0/src/conversation_analyser/manifest.py +31 -0
  12. conversation_analyser-0.1.0/src/conversation_analyser/models.py +76 -0
  13. conversation_analyser-0.1.0/src/conversation_analyser/parsers/__init__.py +7 -0
  14. conversation_analyser-0.1.0/src/conversation_analyser/parsers/anythingllm.py +55 -0
  15. conversation_analyser-0.1.0/src/conversation_analyser/parsers/base.py +57 -0
  16. conversation_analyser-0.1.0/src/conversation_analyser/parsers/llm_segment.py +31 -0
  17. conversation_analyser-0.1.0/src/conversation_analyser/parsers/markers.py +61 -0
  18. conversation_analyser-0.1.0/src/conversation_analyser/parsers/registry.py +73 -0
  19. conversation_analyser-0.1.0/src/conversation_analyser/parsers/role_content.py +55 -0
  20. conversation_analyser-0.1.0/src/conversation_analyser/pipeline.py +228 -0
  21. conversation_analyser-0.1.0/src/conversation_analyser/scoring.py +47 -0
  22. conversation_analyser-0.1.0/src/conversation_analyser/taxonomy.py +208 -0
  23. conversation_analyser-0.1.0/tests/fixtures/anythingllm.json +4 -0
  24. conversation_analyser-0.1.0/tests/fixtures/role_content.json +9 -0
  25. conversation_analyser-0.1.0/tests/fixtures/transcript.txt +7 -0
  26. conversation_analyser-0.1.0/tests/test_analytics.py +40 -0
  27. conversation_analyser-0.1.0/tests/test_api.py +31 -0
  28. conversation_analyser-0.1.0/tests/test_cli_smoke.py +43 -0
  29. conversation_analyser-0.1.0/tests/test_manifest.py +31 -0
  30. conversation_analyser-0.1.0/tests/test_parsers.py +66 -0
  31. conversation_analyser-0.1.0/tests/test_pipeline.py +70 -0
  32. conversation_analyser-0.1.0/tests/test_scoring.py +33 -0
  33. conversation_analyser-0.1.0/tests/test_taxonomy.py +66 -0
  34. conversation_analyser-0.1.0/uv.lock +1932 -0
@@ -0,0 +1,139 @@
1
+ Metadata-Version: 2.4
2
+ Name: conversation-analyser
3
+ Version: 0.1.0
4
+ Summary: Critical-thinking and analytics for human-AI conversations — a member of the lens analyser family.
5
+ Requires-Python: >=3.11
6
+ Requires-Dist: fastapi>=0.109.0
7
+ Requires-Dist: pydantic>=2.0
8
+ Requires-Dist: pyspellchecker>=0.8
9
+ Requires-Dist: python-multipart>=0.0.9
10
+ Requires-Dist: rich>=13.7.0
11
+ Requires-Dist: textstat>=0.7
12
+ Requires-Dist: uvicorn[standard]>=0.27.0
13
+ Requires-Dist: vadersentiment>=3.3
14
+ Provides-Extra: dev
15
+ Requires-Dist: httpx>=0.27.0; extra == 'dev'
16
+ Requires-Dist: pytest>=8.0; extra == 'dev'
17
+ Provides-Extra: embeddings
18
+ Requires-Dist: numpy>=1.24; extra == 'embeddings'
19
+ Requires-Dist: sentence-transformers>=2.2; extra == 'embeddings'
20
+ Provides-Extra: llm
21
+ Requires-Dist: anthropic>=0.39; extra == 'llm'
22
+ Description-Content-Type: text/markdown
23
+
24
+ # conversation-analyser
25
+
26
+ Critical-thinking and analytics for human–AI conversations — a member of the
27
+ [`lens`](../) analyser family.
28
+
29
+ It scores a single conversation on two tiers:
30
+
31
+ 1. **Analytics** (always on, offline): turn/word counts, prompt/response lengths,
32
+ question ratio, pushback hits, readability, sentiment trajectory, prompt
33
+ self-similarity, and temporal metrics when timestamps are present.
34
+ 2. **Critical thinking** (opt-in, needs an LLM): classifies every human turn under
35
+ a 7-label prompt taxonomy, derives engagement ratios, an engagement **band**,
36
+ and a composite **0–100 critical-thinking score** with a component breakdown.
37
+
38
+ The taxonomy reuses the validated `NQ/FU/CH/EX/DG/AC/MT` scheme from the ISYS6020
39
+ marking pipeline (copied and forked). Design: `docs/superpowers/specs/2026-05-23-conversation-analyser-design.md`.
40
+
41
+ ## Install
42
+
43
+ ```bash
44
+ pip install -e . # core: analytics + CLI + HTTP API
45
+ pip install -e '.[embeddings]' # + prompt self-similarity (sentence-transformers)
46
+ pip install -e '.[llm]' # + taxonomy/CT tier (anthropic)
47
+ pip install -e '.[embeddings,llm,dev]' # everything
48
+ export ANTHROPIC_API_KEY=... # required for the critical-thinking tier
49
+ ```
50
+
51
+ ## CLI
52
+
53
+ Bare positional path to analyse (human summary by default, `--json` for machines);
54
+ `serve` subcommand for the HTTP API — same grammar as the rest of the family.
55
+
56
+ ```bash
57
+ conversation-analyser transcript.txt # human summary, analytics only
58
+ conversation-analyser chat.json --json # full JSON to stdout
59
+ conversation-analyser chat.json --llm # add the critical-thinking tier
60
+ conversation-analyser log.json --idle-gap 45 # split sub-sessions on 45-min gaps
61
+ conversation-analyser raw.txt --parse-mode llm-segment --llm
62
+ conversation-analyser serve --port 8009 # run the HTTP API
63
+ ```
64
+
65
+ The critical-thinking tier is **opt-in** (`--llm`) to avoid surprise API costs;
66
+ without it you get the analytics tier only.
67
+
68
+ ## HTTP API
69
+
70
+ ```bash
71
+ conversation-analyser serve --port 8009
72
+ curl -F file=@chat.json 'http://127.0.0.1:8009/analyse' # analytics only
73
+ curl -F file=@chat.json -F llm=true 'http://127.0.0.1:8009/analyse'
74
+ curl http://127.0.0.1:8009/health
75
+ ```
76
+
77
+ `GET /health` and `POST /analyse` (multipart file upload, optional `llm` form
78
+ field) — the same `/analyse` contract auto-analyser routes to.
79
+
80
+ ## Python API
81
+
82
+ ```python
83
+ from conversation_analyser import ConversationAnalyser
84
+
85
+ result = ConversationAnalyser().analyse("transcript.txt", llm=True)
86
+ print(result.model_dump_json(indent=2))
87
+ ```
88
+
89
+ ## Input formats
90
+
91
+ A pluggable adapter registry tries, in order: structured adapters → heuristic
92
+ speaker markers → optional LLM segmentation → unsegmented fallback.
93
+
94
+ - **role/content** message list (OpenAI/Anthropic): `[{"role": "user", "content": "..."}, ...]`
95
+ - **AnythingLLM** rows: `[{"prompt": "...", "response": "...", "createdAt": ...}, ...]`
96
+ - **flat text** with speaker markers: `User:` / `Assistant:` / `Me:` / `ChatGPT:` /
97
+ `You said:` / `ChatGPT said:` / `Prompt:` / `Response:`
98
+ - anything else → LLM-segment (needs `[llm]`), else a single-blob fallback
99
+
100
+ `.pdf`/`.docx` inputs are text-extracted first (needs `pdfplumber`/`markitdown`,
101
+ or pre-extract with `document-analyser`).
102
+
103
+ ## The taxonomy
104
+
105
+ | Code | Meaning |
106
+ |---|---|
107
+ | `NQ` | New Query — opens a new topic |
108
+ | `FU` | Follow-up — clarification/elaboration |
109
+ | `CH` | Challenge — pushes back, tests, asks why |
110
+ | `EX` | Extension — applies/compares/synthesises in a new direction |
111
+ | `DG` | Delegation — task hand-off, no engagement |
112
+ | `AC` | Acknowledgement — thanks/confirmation |
113
+ | `MT` | Meta — about the conversation itself |
114
+
115
+ `critical_thinking = (CH+EX)/turns`, `delegation = DG/turns`, `filler = (AC+MT)/turns`.
116
+ Bands: One-Shot · Delegator · Directed · Iterative · Critical.
117
+
118
+ ## Graceful degradation
119
+
120
+ | Missing | Effect |
121
+ |---|---|
122
+ | `ANTHROPIC_API_KEY` / `[llm]` | `taxonomy`/`critical_thinking` null; analytics still produced; note `llm_unavailable` |
123
+ | `[embeddings]` | `prompt_self_similarity` null; note `embeddings_unavailable` |
124
+ | timestamps | temporal metrics omitted; no sub-session split; note `no timestamps` |
125
+
126
+ ## Output
127
+
128
+ `ConversationAnalysis` → an `aggregate` (rolled up over all human turns, the
129
+ headline) plus one `SessionAnalysis` per idle-gap sub-session, each with
130
+ `analytics`, `taxonomy`, `critical_thinking`, and per-turn `turns` (label +
131
+ rationale + preview). See the design spec §8 for the full schema.
132
+
133
+ ## Testing
134
+
135
+ ```bash
136
+ pytest # fast, deterministic (LLM mocked, no network)
137
+ pytest -m slow # includes sentence-transformers model download
138
+ pytest -m integration # includes live LLM calls
139
+ ```
@@ -0,0 +1,116 @@
1
+ # conversation-analyser
2
+
3
+ Critical-thinking and analytics for human–AI conversations — a member of the
4
+ [`lens`](../) analyser family.
5
+
6
+ It scores a single conversation on two tiers:
7
+
8
+ 1. **Analytics** (always on, offline): turn/word counts, prompt/response lengths,
9
+ question ratio, pushback hits, readability, sentiment trajectory, prompt
10
+ self-similarity, and temporal metrics when timestamps are present.
11
+ 2. **Critical thinking** (opt-in, needs an LLM): classifies every human turn under
12
+ a 7-label prompt taxonomy, derives engagement ratios, an engagement **band**,
13
+ and a composite **0–100 critical-thinking score** with a component breakdown.
14
+
15
+ The taxonomy reuses the validated `NQ/FU/CH/EX/DG/AC/MT` scheme from the ISYS6020
16
+ marking pipeline (copied and forked). Design: `docs/superpowers/specs/2026-05-23-conversation-analyser-design.md`.
17
+
18
+ ## Install
19
+
20
+ ```bash
21
+ pip install -e . # core: analytics + CLI + HTTP API
22
+ pip install -e '.[embeddings]' # + prompt self-similarity (sentence-transformers)
23
+ pip install -e '.[llm]' # + taxonomy/CT tier (anthropic)
24
+ pip install -e '.[embeddings,llm,dev]' # everything
25
+ export ANTHROPIC_API_KEY=... # required for the critical-thinking tier
26
+ ```
27
+
28
+ ## CLI
29
+
30
+ Bare positional path to analyse (human summary by default, `--json` for machines);
31
+ `serve` subcommand for the HTTP API — same grammar as the rest of the family.
32
+
33
+ ```bash
34
+ conversation-analyser transcript.txt # human summary, analytics only
35
+ conversation-analyser chat.json --json # full JSON to stdout
36
+ conversation-analyser chat.json --llm # add the critical-thinking tier
37
+ conversation-analyser log.json --idle-gap 45 # split sub-sessions on 45-min gaps
38
+ conversation-analyser raw.txt --parse-mode llm-segment --llm
39
+ conversation-analyser serve --port 8009 # run the HTTP API
40
+ ```
41
+
42
+ The critical-thinking tier is **opt-in** (`--llm`) to avoid surprise API costs;
43
+ without it you get the analytics tier only.
44
+
45
+ ## HTTP API
46
+
47
+ ```bash
48
+ conversation-analyser serve --port 8009
49
+ curl -F file=@chat.json 'http://127.0.0.1:8009/analyse' # analytics only
50
+ curl -F file=@chat.json -F llm=true 'http://127.0.0.1:8009/analyse'
51
+ curl http://127.0.0.1:8009/health
52
+ ```
53
+
54
+ `GET /health` and `POST /analyse` (multipart file upload, optional `llm` form
55
+ field) — the same `/analyse` contract auto-analyser routes to.
56
+
57
+ ## Python API
58
+
59
+ ```python
60
+ from conversation_analyser import ConversationAnalyser
61
+
62
+ result = ConversationAnalyser().analyse("transcript.txt", llm=True)
63
+ print(result.model_dump_json(indent=2))
64
+ ```
65
+
66
+ ## Input formats
67
+
68
+ A pluggable adapter registry tries, in order: structured adapters → heuristic
69
+ speaker markers → optional LLM segmentation → unsegmented fallback.
70
+
71
+ - **role/content** message list (OpenAI/Anthropic): `[{"role": "user", "content": "..."}, ...]`
72
+ - **AnythingLLM** rows: `[{"prompt": "...", "response": "...", "createdAt": ...}, ...]`
73
+ - **flat text** with speaker markers: `User:` / `Assistant:` / `Me:` / `ChatGPT:` /
74
+ `You said:` / `ChatGPT said:` / `Prompt:` / `Response:`
75
+ - anything else → LLM-segment (needs `[llm]`), else a single-blob fallback
76
+
77
+ `.pdf`/`.docx` inputs are text-extracted first (needs `pdfplumber`/`markitdown`,
78
+ or pre-extract with `document-analyser`).
79
+
80
+ ## The taxonomy
81
+
82
+ | Code | Meaning |
83
+ |---|---|
84
+ | `NQ` | New Query — opens a new topic |
85
+ | `FU` | Follow-up — clarification/elaboration |
86
+ | `CH` | Challenge — pushes back, tests, asks why |
87
+ | `EX` | Extension — applies/compares/synthesises in a new direction |
88
+ | `DG` | Delegation — task hand-off, no engagement |
89
+ | `AC` | Acknowledgement — thanks/confirmation |
90
+ | `MT` | Meta — about the conversation itself |
91
+
92
+ `critical_thinking = (CH+EX)/turns`, `delegation = DG/turns`, `filler = (AC+MT)/turns`.
93
+ Bands: One-Shot · Delegator · Directed · Iterative · Critical.
94
+
95
+ ## Graceful degradation
96
+
97
+ | Missing | Effect |
98
+ |---|---|
99
+ | `ANTHROPIC_API_KEY` / `[llm]` | `taxonomy`/`critical_thinking` null; analytics still produced; note `llm_unavailable` |
100
+ | `[embeddings]` | `prompt_self_similarity` null; note `embeddings_unavailable` |
101
+ | timestamps | temporal metrics omitted; no sub-session split; note `no timestamps` |
102
+
103
+ ## Output
104
+
105
+ `ConversationAnalysis` → an `aggregate` (rolled up over all human turns, the
106
+ headline) plus one `SessionAnalysis` per idle-gap sub-session, each with
107
+ `analytics`, `taxonomy`, `critical_thinking`, and per-turn `turns` (label +
108
+ rationale + preview). See the design spec §8 for the full schema.
109
+
110
+ ## Testing
111
+
112
+ ```bash
113
+ pytest # fast, deterministic (LLM mocked, no network)
114
+ pytest -m slow # includes sentence-transformers model download
115
+ pytest -m integration # includes live LLM calls
116
+ ```
@@ -0,0 +1,40 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "conversation-analyser"
7
+ version = "0.1.0"
8
+ description = "Critical-thinking and analytics for human-AI conversations — a member of the lens analyser family."
9
+ readme = "README.md"
10
+ requires-python = ">=3.11"
11
+ dependencies = [
12
+ "pydantic>=2.0",
13
+ "textstat>=0.7",
14
+ "vaderSentiment>=3.3",
15
+ "pyspellchecker>=0.8",
16
+ "fastapi>=0.109.0",
17
+ "uvicorn[standard]>=0.27.0",
18
+ "python-multipart>=0.0.9",
19
+ "rich>=13.7.0",
20
+ ]
21
+
22
+ [project.optional-dependencies]
23
+ embeddings = ["sentence-transformers>=2.2", "numpy>=1.24"]
24
+ llm = ["anthropic>=0.39"]
25
+ dev = ["pytest>=8.0", "httpx>=0.27.0"]
26
+
27
+ [project.scripts]
28
+ conversation-analyser = "conversation_analyser.cli:main"
29
+
30
+ [tool.hatch.build.targets.wheel]
31
+ packages = ["src/conversation_analyser"]
32
+
33
+ [tool.pytest.ini_options]
34
+ testpaths = ["tests"]
35
+ pythonpath = ["src"]
36
+ addopts = "-m 'not slow and not integration'"
37
+ markers = [
38
+ "slow: loads real models (sentence-transformers) or downloads weights — opt-in with -m slow",
39
+ "integration: makes live LLM calls — opt-in with -m integration",
40
+ ]
@@ -0,0 +1,16 @@
1
+ """conversation-analyser: critical-thinking + analytics for human-AI conversations.
2
+
3
+ Public API:
4
+
5
+ from conversation_analyser import ConversationAnalyser, ConversationAnalysis
6
+
7
+ result = ConversationAnalyser().analyse("transcript.txt")
8
+ print(result.model_dump_json(indent=2))
9
+ """
10
+ from __future__ import annotations
11
+
12
+ from .manifest import MANIFEST
13
+ from .models import ConversationAnalysis
14
+ from .pipeline import ConversationAnalyser
15
+
16
+ __all__ = ["ConversationAnalyser", "ConversationAnalysis", "MANIFEST"]
@@ -0,0 +1,127 @@
1
+ """Domain-neutral analytics tier (design spec §7).
2
+
3
+ All metrics derive deterministically from a session's turns. Optional/heavy
4
+ metrics (self-similarity) degrade to None when their dependency is absent; the
5
+ pipeline records a single note. Sentiment/readability/typo use the light core
6
+ deps and are wrapped defensively.
7
+ """
8
+ from __future__ import annotations
9
+
10
+ import re
11
+ from collections import Counter
12
+ from statistics import fmean
13
+
14
+ from . import embeddings
15
+ from .models import AnalyticsMetrics
16
+ from .parsers.base import ParsedTurn
17
+
18
+ # Pushback cue regex, ported verbatim from marking_pipeline/transcript.py.
19
+ _PUSHBACK_RE = re.compile(
20
+ r"\b(no,|actually|but\b|wait\b|are you sure|that's wrong|incorrect|"
21
+ r"i disagree|not right|you're wrong|why\b|why is|why does)\b",
22
+ re.IGNORECASE,
23
+ )
24
+ _WORD_RE = re.compile(r"\w+")
25
+
26
+
27
+ def _words(text: str) -> int:
28
+ return len(_WORD_RE.findall(text or ""))
29
+
30
+
31
+ def pushback_count(human_turns: list[ParsedTurn]) -> int:
32
+ return sum(len(_PUSHBACK_RE.findall(t.content)) for t in human_turns)
33
+
34
+
35
+ def _question_ratio(prompts: list[str]) -> float:
36
+ if not prompts:
37
+ return 0.0
38
+ asked = sum(1 for p in prompts if "?" in p)
39
+ return round(asked / len(prompts), 2)
40
+
41
+
42
+ def _flesch(text: str) -> float | None:
43
+ if not text.strip():
44
+ return None
45
+ try:
46
+ import textstat
47
+
48
+ return round(float(textstat.flesch_reading_ease(text)), 1)
49
+ except Exception:
50
+ return None
51
+
52
+
53
+ def _typo_rate(prompts: list[str]) -> float | None:
54
+ try:
55
+ from spellchecker import SpellChecker
56
+ except Exception:
57
+ return None
58
+ checker = SpellChecker()
59
+ rates: list[float] = []
60
+ for p in prompts:
61
+ words = [w.lower() for w in _WORD_RE.findall(p) if w.isalpha()]
62
+ if not words:
63
+ continue
64
+ unknown = checker.unknown(words)
65
+ rates.append(len(unknown) / len(words))
66
+ return round(fmean(rates), 3) if rates else None
67
+
68
+
69
+ def _sentiment(prompts: list[str]) -> tuple[float | None, float | None, float | None]:
70
+ if not prompts:
71
+ return None, None, None
72
+ try:
73
+ from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
74
+ except Exception:
75
+ return None, None, None
76
+ analyzer = SentimentIntensityAnalyzer()
77
+ start = round(analyzer.polarity_scores(prompts[0])["compound"], 3)
78
+ end = round(analyzer.polarity_scores(prompts[-1])["compound"], 3)
79
+ return start, end, round(end - start, 3)
80
+
81
+
82
+ def _temporal(turns: list[ParsedTurn]) -> tuple[float | None, int | None, int | None]:
83
+ stamps = [t.timestamp for t in turns if t.timestamp is not None]
84
+ if len(stamps) < 2:
85
+ return None, None, None
86
+ duration = (max(stamps) - min(stamps)).total_seconds() / 60.0
87
+ hour_mode = Counter(s.hour for s in stamps).most_common(1)[0][0]
88
+ weekday_mode = Counter(s.weekday() for s in stamps).most_common(1)[0][0]
89
+ return round(duration, 1), hour_mode, weekday_mode
90
+
91
+
92
+ def compute_analytics(turns: list[ParsedTurn], *, with_embeddings: bool = True) -> AnalyticsMetrics:
93
+ human = [t for t in turns if t.role == "human"]
94
+ assistant = [t for t in turns if t.role == "assistant"]
95
+ prompts = [t.content for t in human]
96
+ responses = [t.content for t in assistant]
97
+
98
+ prompt_words = [_words(p) for p in prompts]
99
+ response_words = [_words(r) for r in responses]
100
+
101
+ similarity = None
102
+ if with_embeddings and embeddings.available():
103
+ similarity = embeddings.mean_self_similarity(prompts)
104
+
105
+ sent_start, sent_end, sent_delta = _sentiment(prompts)
106
+ duration_min, hour_mode, weekday_mode = _temporal(turns)
107
+
108
+ return AnalyticsMetrics(
109
+ turn_count=len(turns),
110
+ human_turn_count=len(human),
111
+ assistant_turn_count=len(assistant),
112
+ total_words=sum(prompt_words) + sum(response_words),
113
+ mean_prompt_len=round(fmean(prompt_words), 2) if prompt_words else 0.0,
114
+ max_prompt_len=max(prompt_words) if prompt_words else 0,
115
+ mean_response_len=round(fmean(response_words), 2) if response_words else 0.0,
116
+ question_ratio=_question_ratio(prompts),
117
+ pushback_count=pushback_count(human),
118
+ prompt_self_similarity=similarity,
119
+ flesch_reading_ease=_flesch("\n\n".join(prompts)),
120
+ mean_typo_rate=_typo_rate(prompts),
121
+ sentiment_start=sent_start,
122
+ sentiment_end=sent_end,
123
+ sentiment_delta=sent_delta,
124
+ duration_min=duration_min,
125
+ hour_of_day_mode=hour_mode,
126
+ weekday_mode=weekday_mode,
127
+ )
@@ -0,0 +1,62 @@
1
+ """FastAPI app for conversation-analyser, mirroring the lens family contract.
2
+
3
+ Module-level `app` so the CLI can launch it with
4
+ `uvicorn.run("conversation_analyser.api:app", ...)` and tests can drive it with
5
+ fastapi.testclient.TestClient. Endpoints: GET /health, POST /analyse (file upload).
6
+ """
7
+ from __future__ import annotations
8
+
9
+ import tempfile
10
+ import time
11
+ from importlib.metadata import version
12
+ from pathlib import Path
13
+
14
+ from fastapi import FastAPI, File, Form, HTTPException, UploadFile
15
+
16
+ from .manifest import MANIFEST
17
+ from .models import ConversationAnalysis
18
+ from .pipeline import ConversationAnalyser
19
+
20
+ _start_time = time.time()
21
+
22
+ app = FastAPI(title="conversation-analyser", version=version("conversation-analyser"))
23
+
24
+ _analyser = ConversationAnalyser()
25
+
26
+
27
+ @app.get("/health")
28
+ def health() -> dict:
29
+ return {
30
+ "status": "ok",
31
+ "uptime": round(time.time() - _start_time, 1),
32
+ "version": version("conversation-analyser"),
33
+ }
34
+
35
+
36
+ @app.get("/manifest")
37
+ def manifest() -> dict:
38
+ return MANIFEST
39
+
40
+
41
+ @app.post("/analyse", response_model=ConversationAnalysis)
42
+ async def analyse(
43
+ file: UploadFile = File(...),
44
+ llm: bool = Form(False),
45
+ ) -> ConversationAnalysis:
46
+ content = await file.read()
47
+ if not content:
48
+ raise HTTPException(status_code=422, detail="Empty file")
49
+
50
+ suffix = Path(file.filename or "upload.txt").suffix or ".txt"
51
+ with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
52
+ tmp_path = Path(tmp.name)
53
+ tmp.write(content)
54
+
55
+ try:
56
+ return _analyser.analyse(tmp_path, llm=llm, input_label=file.filename or "<upload>")
57
+ except ValueError as e:
58
+ raise HTTPException(status_code=422, detail=str(e)) from e
59
+ except Exception as e: # noqa: BLE001
60
+ raise HTTPException(status_code=500, detail=str(e)) from e
61
+ finally:
62
+ tmp_path.unlink(missing_ok=True)
@@ -0,0 +1,129 @@
1
+ """CLI entry point following the lens family pattern.
2
+
3
+ conversation-analyser <path> [--json] [--llm] [...] # analyse (default)
4
+ conversation-analyser serve [--host H] [--port P] # run the HTTP API
5
+
6
+ Human-readable summary by default; `--json` emits the full ConversationAnalysis
7
+ to stdout (this is what auto-analyser consumes). Diagnostics go to stderr.
8
+ """
9
+ from __future__ import annotations
10
+
11
+ import argparse
12
+ import json
13
+ import os
14
+ import sys
15
+ from pathlib import Path
16
+
17
+ from .config import DEFAULT_PORT, IDLE_GAP_MIN
18
+ from .manifest import MANIFEST
19
+ from .models import ConversationAnalysis
20
+ from .pipeline import ConversationAnalyser
21
+
22
+
23
+ def main() -> None:
24
+ if len(sys.argv) > 1 and sys.argv[1] == "serve":
25
+ _serve(sys.argv[2:])
26
+ return
27
+
28
+ if len(sys.argv) > 1 and sys.argv[1] == "manifest":
29
+ print(json.dumps(MANIFEST, indent=2))
30
+ return
31
+
32
+ parser = argparse.ArgumentParser(
33
+ prog="conversation-analyser",
34
+ description="Analyse a human-AI conversation: analytics + critical-thinking taxonomy.",
35
+ )
36
+ parser.add_argument("file", type=Path, help="conversation file (.json/.txt/.md/.pdf)")
37
+ parser.add_argument("--json", action="store_true", dest="as_json", help="JSON output")
38
+ parser.add_argument("--llm", action="store_true", help="add the taxonomy/critical-thinking tier (needs [llm] + ANTHROPIC_API_KEY)")
39
+ parser.add_argument("--no-embeddings", action="store_true", help="skip prompt self-similarity")
40
+ parser.add_argument(
41
+ "--parse-mode",
42
+ choices=("auto", "structured", "heuristic", "llm-segment"),
43
+ default="auto",
44
+ )
45
+ parser.add_argument("--idle-gap", type=float, default=IDLE_GAP_MIN, help="sub-session split (minutes)")
46
+ args = parser.parse_args()
47
+
48
+ if not args.file.exists():
49
+ print(f"Error: file not found: {args.file}", file=sys.stderr)
50
+ sys.exit(1)
51
+
52
+ try:
53
+ result = ConversationAnalyser(idle_gap_min=args.idle_gap).analyse(
54
+ args.file,
55
+ llm=args.llm,
56
+ with_embeddings=not args.no_embeddings,
57
+ parse_mode=args.parse_mode,
58
+ )
59
+ except ValueError as e:
60
+ print(f"Error: {e}", file=sys.stderr)
61
+ sys.exit(1)
62
+ except Exception as e: # noqa: BLE001
63
+ print(f"Analysis failed: {e}", file=sys.stderr)
64
+ sys.exit(1)
65
+
66
+ if args.as_json:
67
+ print(result.model_dump_json(indent=2))
68
+ return
69
+
70
+ _print_human(result)
71
+
72
+
73
+ def _print_human(result: ConversationAnalysis) -> None:
74
+ from rich.console import Console
75
+ from rich.table import Table
76
+
77
+ console = Console(file=sys.stdout)
78
+ agg = result.aggregate
79
+ a = agg.analytics
80
+
81
+ console.print(
82
+ f"[bold]Input:[/bold] {result.input} "
83
+ f"[bold]Format:[/bold] {result.format_detected} ({result.parse_mode}) "
84
+ f"[bold]Sessions:[/bold] {result.session_count} "
85
+ f"[bold]LLM:[/bold] {'yes' if result.llm_used else 'no'}"
86
+ )
87
+ console.print(
88
+ f"[bold]Turns:[/bold] {a.turn_count} "
89
+ f"(human {a.human_turn_count}, ai {a.assistant_turn_count}) "
90
+ f"[bold]Words:[/bold] {a.total_words} "
91
+ f"[bold]Questions:[/bold] {a.question_ratio:.0%} "
92
+ f"[bold]Pushback:[/bold] {a.pushback_count}"
93
+ )
94
+
95
+ if agg.critical_thinking is not None and agg.taxonomy is not None:
96
+ ct = agg.critical_thinking
97
+ console.print(
98
+ f"[bold]Critical-thinking score:[/bold] {ct.score:.0f}/100 "
99
+ f"[bold]Band:[/bold] {ct.band} "
100
+ f"[bold]Longest engaged chain:[/bold] {agg.taxonomy.longest_engaged_chain}"
101
+ )
102
+ table = Table(show_header=True, header_style="bold")
103
+ for code in agg.taxonomy.label_counts:
104
+ table.add_column(code, justify="right")
105
+ table.add_row(*[str(v) for v in agg.taxonomy.label_counts.values()])
106
+ console.print(table)
107
+ else:
108
+ console.print("[dim]Critical-thinking tier skipped (run with --llm).[/dim]")
109
+
110
+ if result.notes:
111
+ console.print(f"[dim]Notes: {', '.join(result.notes)}[/dim]")
112
+
113
+
114
+ def _serve(argv: list[str]) -> None:
115
+ import uvicorn
116
+
117
+ parser = argparse.ArgumentParser(prog="conversation-analyser serve")
118
+ parser.add_argument(
119
+ "--port", type=int, default=int(os.getenv("CONVERSATION_ANALYSER_PORT", str(DEFAULT_PORT)))
120
+ )
121
+ parser.add_argument(
122
+ "--host", default=os.getenv("CONVERSATION_ANALYSER_HOST", "127.0.0.1")
123
+ )
124
+ args = parser.parse_args(argv)
125
+ uvicorn.run("conversation_analyser.api:app", host=args.host, port=args.port)
126
+
127
+
128
+ if __name__ == "__main__":
129
+ main()