reflection-analyser 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- reflection_analyser-0.1.0/.gitignore +10 -0
- reflection_analyser-0.1.0/LICENSE +21 -0
- reflection_analyser-0.1.0/PKG-INFO +143 -0
- reflection_analyser-0.1.0/README.md +112 -0
- reflection_analyser-0.1.0/pyproject.toml +57 -0
- reflection_analyser-0.1.0/src/reflection_analyser/__init__.py +14 -0
- reflection_analyser-0.1.0/src/reflection_analyser/analyser.py +123 -0
- reflection_analyser-0.1.0/src/reflection_analyser/api.py +64 -0
- reflection_analyser-0.1.0/src/reflection_analyser/cli.py +88 -0
- reflection_analyser-0.1.0/src/reflection_analyser/exceptions.py +2 -0
- reflection_analyser-0.1.0/src/reflection_analyser/lexicon.py +165 -0
- reflection_analyser-0.1.0/src/reflection_analyser/manifest.py +15 -0
- reflection_analyser-0.1.0/src/reflection_analyser/schemas.py +50 -0
- reflection_analyser-0.1.0/tests/__init__.py +0 -0
- reflection_analyser-0.1.0/tests/test_analyser.py +130 -0
- reflection_analyser-0.1.0/tests/test_api.py +54 -0
- reflection_analyser-0.1.0/tests/test_cli.py +45 -0
- reflection_analyser-0.1.0/tests/test_lexicon.py +64 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Michael Borck
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: reflection-analyser
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Reflective-writing analysis — metacognition, criticality, depth (Moon-style bands)
|
|
5
|
+
Project-URL: Homepage, https://github.com/michael-borck/reflection-analyser
|
|
6
|
+
Project-URL: Repository, https://github.com/michael-borck/reflection-analyser
|
|
7
|
+
Project-URL: Issues, https://github.com/michael-borck/reflection-analyser/issues
|
|
8
|
+
Author-email: Michael Borck <michael.borck@curtin.edu.au>
|
|
9
|
+
License: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: assessment,education,lens,metacognition,reflection,udl,writing
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Requires-Python: >=3.11
|
|
19
|
+
Requires-Dist: fastapi>=0.109.0
|
|
20
|
+
Requires-Dist: lens-contract>=0.2.0
|
|
21
|
+
Requires-Dist: python-multipart>=0.0.9
|
|
22
|
+
Requires-Dist: rich>=13.7.0
|
|
23
|
+
Requires-Dist: uvicorn[standard]>=0.27.0
|
|
24
|
+
Provides-Extra: dev
|
|
25
|
+
Requires-Dist: httpx>=0.27.0; extra == 'dev'
|
|
26
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
|
|
27
|
+
Requires-Dist: pytest>=8.0.0; extra == 'dev'
|
|
28
|
+
Provides-Extra: documents
|
|
29
|
+
Requires-Dist: document-analyser>=0.6.0; extra == 'documents'
|
|
30
|
+
Description-Content-Type: text/markdown
|
|
31
|
+
|
|
32
|
+
# reflection-analyser
|
|
33
|
+
|
|
34
|
+
**Reflective-writing analysis** — the [lens-family](https://github.com/michael-borck/lens-analysers)
|
|
35
|
+
member that reads a learning journal / reflection / portfolio entry as **reflection**, not just
|
|
36
|
+
as prose.
|
|
37
|
+
|
|
38
|
+
> `document-analyser` reads readability; this reads *reflective depth*. Different signals from
|
|
39
|
+
> the same words. **Explicit-only** (`auto_routable: false`) — same pattern as
|
|
40
|
+
> `conversation-analyser`: text and prose extensions auto-route to `document-analyser`; invoke
|
|
41
|
+
> `reflection-analyser` deliberately when you want the reflective-depth interpretation.
|
|
42
|
+
|
|
43
|
+
Built around the markers commonly used in reflective-writing rubrics
|
|
44
|
+
([Moon's depth scale](https://www.tandfonline.com/doi/abs/10.1080/0307507990240207),
|
|
45
|
+
Gibbs' reflective cycle, the SOLO taxonomy): metacognition, criticality, evidence linkage,
|
|
46
|
+
affect language, and forward-looking action.
|
|
47
|
+
|
|
48
|
+
## Install
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
pip install reflection-analyser
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
Optional: read `.pdf` / `.docx` / `.pptx` journals (otherwise plain-text / `.md` only):
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
pip install 'reflection-analyser[documents]'
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
## Use
|
|
61
|
+
|
|
62
|
+
**Python:**
|
|
63
|
+
|
|
64
|
+
```python
|
|
65
|
+
from reflection_analyser import ReflectionAnalyser
|
|
66
|
+
|
|
67
|
+
# From text directly
|
|
68
|
+
result = ReflectionAnalyser().analyse_text("Looking back, I realised that…")
|
|
69
|
+
|
|
70
|
+
# From a file (composes on document-analyser for binary docs when [documents] is installed)
|
|
71
|
+
result = ReflectionAnalyser().analyse("journal.md")
|
|
72
|
+
result = ReflectionAnalyser().analyse("journal.docx") # requires [documents]
|
|
73
|
+
|
|
74
|
+
print(result.depth_band) # "dialogic"
|
|
75
|
+
print(result.composite_depth_score) # 0.62
|
|
76
|
+
print(result.metacognition.count) # 7
|
|
77
|
+
print(result.criticality.count) # 3
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
**CLI:**
|
|
81
|
+
|
|
82
|
+
```bash
|
|
83
|
+
reflection-analyser journal.md
|
|
84
|
+
reflection-analyser journal.txt --json
|
|
85
|
+
reflection-analyser journal.docx # needs [documents] extra
|
|
86
|
+
echo "Looking back…" | reflection-analyser -
|
|
87
|
+
reflection-analyser serve
|
|
88
|
+
reflection-analyser manifest
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
**HTTP** (`reflection-analyser serve` on port 8015):
|
|
92
|
+
|
|
93
|
+
```bash
|
|
94
|
+
curl -F file=@journal.md http://localhost:8015/analyse
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
## Signals
|
|
98
|
+
|
|
99
|
+
For a piece of reflective writing:
|
|
100
|
+
|
|
101
|
+
- **Metacognition** — first-person + cognitive verbs (`I realised`, `I noticed`, `looking back`,
|
|
102
|
+
`on reflection`). Surface depth indicator.
|
|
103
|
+
- **Criticality** — contrast/qualification phrases (`however`, `in contrast`, `that said`,
|
|
104
|
+
`on the other hand`). Marker of dialogic vs descriptive reflection.
|
|
105
|
+
- **Evidence** — references to specific moments, sources, dates, quotes — proper-noun and
|
|
106
|
+
citation density. Concrete vs abstract.
|
|
107
|
+
- **Affect** — emotion words (`frustrated`, `surprised`, `confident`, `uncertain`). Too few =
|
|
108
|
+
clinical; presence indicates engagement.
|
|
109
|
+
- **Action / forward-looking** — `next time`, `going forward`, `I will`, future-tense intent.
|
|
110
|
+
Marker of transformative reflection.
|
|
111
|
+
|
|
112
|
+
**Composite depth score** (0–1) combines per-marker coverages; mapped to a Moon-style band:
|
|
113
|
+
|
|
114
|
+
| Band | Score | Description |
|
|
115
|
+
|---|---|---|
|
|
116
|
+
| descriptive | 0.0–0.25 | "What happened" only — events recounted, little interpretation |
|
|
117
|
+
| dialogic | 0.25–0.5 | Some self-questioning + critical thought |
|
|
118
|
+
| critical | 0.5–0.75 | Multiple perspectives, evidence linkage, qualification |
|
|
119
|
+
| transformative | 0.75–1.0 | Forward-looking insight, evidence-linked, change-oriented |
|
|
120
|
+
|
|
121
|
+
The score is a **signal, not a grade** — it's meant to inform human judgement, not replace it.
|
|
122
|
+
|
|
123
|
+
## The family
|
|
124
|
+
|
|
125
|
+
| What you want | Use |
|
|
126
|
+
|---|---|
|
|
127
|
+
| Document text + readability | **document-analyser** |
|
|
128
|
+
| Reflective depth on that text | **reflection-analyser** (this) |
|
|
129
|
+
| Human-AI conversation analysis | **conversation-analyser** |
|
|
130
|
+
| Any file → right engine | **auto-analyser** |
|
|
131
|
+
|
|
132
|
+
## Limits
|
|
133
|
+
|
|
134
|
+
- Lexicon-based v1 — fast, transparent, but catches phrasing not meaning. A reflective sentence
|
|
135
|
+
without our trigger words underscores; a non-reflective sentence with `I realised` overscores.
|
|
136
|
+
- English-only for v1.
|
|
137
|
+
- Calibrated against generic reflective-writing rubrics; tune the band thresholds in
|
|
138
|
+
`_BAND_THRESHOLDS` for your unit's specific rubric if needed.
|
|
139
|
+
- Vision/LLM-augmented depth scoring is a possible follow-on; not in v1.
|
|
140
|
+
|
|
141
|
+
## License
|
|
142
|
+
|
|
143
|
+
MIT
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
# reflection-analyser
|
|
2
|
+
|
|
3
|
+
**Reflective-writing analysis** — the [lens-family](https://github.com/michael-borck/lens-analysers)
|
|
4
|
+
member that reads a learning journal / reflection / portfolio entry as **reflection**, not just
|
|
5
|
+
as prose.
|
|
6
|
+
|
|
7
|
+
> `document-analyser` reads readability; this reads *reflective depth*. Different signals from
|
|
8
|
+
> the same words. **Explicit-only** (`auto_routable: false`) — same pattern as
|
|
9
|
+
> `conversation-analyser`: text and prose extensions auto-route to `document-analyser`; invoke
|
|
10
|
+
> `reflection-analyser` deliberately when you want the reflective-depth interpretation.
|
|
11
|
+
|
|
12
|
+
Built around the markers commonly used in reflective-writing rubrics
|
|
13
|
+
([Moon's depth scale](https://www.tandfonline.com/doi/abs/10.1080/0307507990240207),
|
|
14
|
+
Gibbs' reflective cycle, the SOLO taxonomy): metacognition, criticality, evidence linkage,
|
|
15
|
+
affect language, and forward-looking action.
|
|
16
|
+
|
|
17
|
+
## Install
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
pip install reflection-analyser
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
Optional: read `.pdf` / `.docx` / `.pptx` journals (otherwise plain-text / `.md` only):
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
pip install 'reflection-analyser[documents]'
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
## Use
|
|
30
|
+
|
|
31
|
+
**Python:**
|
|
32
|
+
|
|
33
|
+
```python
|
|
34
|
+
from reflection_analyser import ReflectionAnalyser
|
|
35
|
+
|
|
36
|
+
# From text directly
|
|
37
|
+
result = ReflectionAnalyser().analyse_text("Looking back, I realised that…")
|
|
38
|
+
|
|
39
|
+
# From a file (composes on document-analyser for binary docs when [documents] is installed)
|
|
40
|
+
result = ReflectionAnalyser().analyse("journal.md")
|
|
41
|
+
result = ReflectionAnalyser().analyse("journal.docx") # requires [documents]
|
|
42
|
+
|
|
43
|
+
print(result.depth_band) # "dialogic"
|
|
44
|
+
print(result.composite_depth_score) # 0.62
|
|
45
|
+
print(result.metacognition.count) # 7
|
|
46
|
+
print(result.criticality.count) # 3
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
**CLI:**
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
reflection-analyser journal.md
|
|
53
|
+
reflection-analyser journal.txt --json
|
|
54
|
+
reflection-analyser journal.docx # needs [documents] extra
|
|
55
|
+
echo "Looking back…" | reflection-analyser -
|
|
56
|
+
reflection-analyser serve
|
|
57
|
+
reflection-analyser manifest
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
**HTTP** (`reflection-analyser serve` on port 8015):
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
curl -F file=@journal.md http://localhost:8015/analyse
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
## Signals
|
|
67
|
+
|
|
68
|
+
For a piece of reflective writing:
|
|
69
|
+
|
|
70
|
+
- **Metacognition** — first-person + cognitive verbs (`I realised`, `I noticed`, `looking back`,
|
|
71
|
+
`on reflection`). Surface depth indicator.
|
|
72
|
+
- **Criticality** — contrast/qualification phrases (`however`, `in contrast`, `that said`,
|
|
73
|
+
`on the other hand`). Marker of dialogic vs descriptive reflection.
|
|
74
|
+
- **Evidence** — references to specific moments, sources, dates, quotes — proper-noun and
|
|
75
|
+
citation density. Concrete vs abstract.
|
|
76
|
+
- **Affect** — emotion words (`frustrated`, `surprised`, `confident`, `uncertain`). Too few =
|
|
77
|
+
clinical; presence indicates engagement.
|
|
78
|
+
- **Action / forward-looking** — `next time`, `going forward`, `I will`, future-tense intent.
|
|
79
|
+
Marker of transformative reflection.
|
|
80
|
+
|
|
81
|
+
**Composite depth score** (0–1) combines per-marker coverages; mapped to a Moon-style band:
|
|
82
|
+
|
|
83
|
+
| Band | Score | Description |
|
|
84
|
+
|---|---|---|
|
|
85
|
+
| descriptive | 0.0–0.25 | "What happened" only — events recounted, little interpretation |
|
|
86
|
+
| dialogic | 0.25–0.5 | Some self-questioning + critical thought |
|
|
87
|
+
| critical | 0.5–0.75 | Multiple perspectives, evidence linkage, qualification |
|
|
88
|
+
| transformative | 0.75–1.0 | Forward-looking insight, evidence-linked, change-oriented |
|
|
89
|
+
|
|
90
|
+
The score is a **signal, not a grade** — it's meant to inform human judgement, not replace it.
|
|
91
|
+
|
|
92
|
+
## The family
|
|
93
|
+
|
|
94
|
+
| What you want | Use |
|
|
95
|
+
|---|---|
|
|
96
|
+
| Document text + readability | **document-analyser** |
|
|
97
|
+
| Reflective depth on that text | **reflection-analyser** (this) |
|
|
98
|
+
| Human-AI conversation analysis | **conversation-analyser** |
|
|
99
|
+
| Any file → right engine | **auto-analyser** |
|
|
100
|
+
|
|
101
|
+
## Limits
|
|
102
|
+
|
|
103
|
+
- Lexicon-based v1 — fast, transparent, but catches phrasing not meaning. A reflective sentence
|
|
104
|
+
without our trigger words underscores; a non-reflective sentence with `I realised` overscores.
|
|
105
|
+
- English-only for v1.
|
|
106
|
+
- Calibrated against generic reflective-writing rubrics; tune the band thresholds in
|
|
107
|
+
`_BAND_THRESHOLDS` for your unit's specific rubric if needed.
|
|
108
|
+
- Vision/LLM-augmented depth scoring is a possible follow-on; not in v1.
|
|
109
|
+
|
|
110
|
+
## License
|
|
111
|
+
|
|
112
|
+
MIT
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "reflection-analyser"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Reflective-writing analysis — metacognition, criticality, depth (Moon-style bands)"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.11"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
authors = [{ name = "Michael Borck", email = "michael.borck@curtin.edu.au" }]
|
|
13
|
+
keywords = ["reflection", "writing", "metacognition", "education", "assessment", "udl", "lens"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 3 - Alpha",
|
|
16
|
+
"Intended Audience :: Developers",
|
|
17
|
+
"License :: OSI Approved :: MIT License",
|
|
18
|
+
"Programming Language :: Python :: 3",
|
|
19
|
+
"Programming Language :: Python :: 3.11",
|
|
20
|
+
"Programming Language :: Python :: 3.12",
|
|
21
|
+
]
|
|
22
|
+
dependencies = [
|
|
23
|
+
"lens-contract>=0.2.0",
|
|
24
|
+
"fastapi>=0.109.0",
|
|
25
|
+
"uvicorn[standard]>=0.27.0",
|
|
26
|
+
"python-multipart>=0.0.9",
|
|
27
|
+
"rich>=13.7.0",
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
[project.optional-dependencies]
|
|
31
|
+
# document-analyser brings pdfplumber + markitdown for the binary-document path.
|
|
32
|
+
# Without this extra, reflection-analyser still works on plain-text / markdown.
|
|
33
|
+
documents = [
|
|
34
|
+
"document-analyser>=0.6.0",
|
|
35
|
+
]
|
|
36
|
+
dev = [
|
|
37
|
+
"pytest>=8.0.0",
|
|
38
|
+
"pytest-cov>=4.0.0",
|
|
39
|
+
"httpx>=0.27.0",
|
|
40
|
+
]
|
|
41
|
+
|
|
42
|
+
[tool.uv.sources]
|
|
43
|
+
lens-contract = { path = "../lens-contract", editable = true }
|
|
44
|
+
|
|
45
|
+
[project.scripts]
|
|
46
|
+
reflection-analyser = "reflection_analyser.cli:main"
|
|
47
|
+
|
|
48
|
+
[project.urls]
|
|
49
|
+
Homepage = "https://github.com/michael-borck/reflection-analyser"
|
|
50
|
+
Repository = "https://github.com/michael-borck/reflection-analyser"
|
|
51
|
+
Issues = "https://github.com/michael-borck/reflection-analyser/issues"
|
|
52
|
+
|
|
53
|
+
[tool.hatch.build.targets.wheel]
|
|
54
|
+
packages = ["src/reflection_analyser"]
|
|
55
|
+
|
|
56
|
+
[tool.pytest.ini_options]
|
|
57
|
+
testpaths = ["tests"]
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""reflection-analyser — reflective-writing depth analysis for the lens family."""
|
|
2
|
+
from .analyser import ReflectionAnalyser
|
|
3
|
+
from .exceptions import ReflectionAnalyserError
|
|
4
|
+
from .schemas import (
|
|
5
|
+
MarkerSignal,
|
|
6
|
+
ReflectionAnalysis,
|
|
7
|
+
)
|
|
8
|
+
|
|
9
|
+
__all__ = [
|
|
10
|
+
"ReflectionAnalyser",
|
|
11
|
+
"ReflectionAnalyserError",
|
|
12
|
+
"ReflectionAnalysis",
|
|
13
|
+
"MarkerSignal",
|
|
14
|
+
]
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
"""Core reflection analyser — text → markers → composite depth → Moon-style band."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import re
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from .exceptions import ReflectionAnalyserError
|
|
8
|
+
from .lexicon import lexicons
|
|
9
|
+
from .schemas import MarkerSignal, ReflectionAnalysis
|
|
10
|
+
|
|
11
|
+
# Weighting per marker family in the composite depth score. Tuned so that
|
|
12
|
+
# metacognition + criticality + evidence dominate, with affect and forward-
|
|
13
|
+
# looking acting as supporting indicators. Sums to 1.0.
|
|
14
|
+
_WEIGHTS = {
|
|
15
|
+
"metacognition": 0.30,
|
|
16
|
+
"criticality": 0.25,
|
|
17
|
+
"evidence": 0.20,
|
|
18
|
+
"affect": 0.10,
|
|
19
|
+
"forward": 0.15,
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
# A marker reaches its weight-cap when its coverage hits this many hits per
|
|
23
|
+
# 100 words. Avoids a wordy passage with 30 'however's saturating the score.
|
|
24
|
+
_COVERAGE_CAP_PER_100W = 2.0
|
|
25
|
+
|
|
26
|
+
# Moon-style band thresholds. Tuneable per rubric.
|
|
27
|
+
_BAND_THRESHOLDS = [
|
|
28
|
+
(0.75, "transformative"),
|
|
29
|
+
(0.50, "critical"),
|
|
30
|
+
(0.25, "dialogic"),
|
|
31
|
+
(0.00, "descriptive"),
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
_SENTENCE_SPLIT = re.compile(r"(?<=[.!?])\s+")
|
|
35
|
+
_WORD_SPLIT = re.compile(r"\b\w+\b")
|
|
36
|
+
|
|
37
|
+
_TEXT_SUFFIXES = {".txt", ".md", ".markdown", ".text", ".rst", ".qmd", ""}
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class ReflectionAnalyser:
|
|
41
|
+
"""Score the reflective depth of a piece of writing."""
|
|
42
|
+
|
|
43
|
+
def analyse_text(self, text: str, *, source_kind: str = "text") -> ReflectionAnalysis:
|
|
44
|
+
if not text or not text.strip():
|
|
45
|
+
raise ReflectionAnalyserError("Empty input — nothing to analyse")
|
|
46
|
+
|
|
47
|
+
words = _WORD_SPLIT.findall(text)
|
|
48
|
+
word_count = len(words)
|
|
49
|
+
sentences = [s for s in _SENTENCE_SPLIT.split(text) if s.strip()]
|
|
50
|
+
|
|
51
|
+
lex = lexicons()
|
|
52
|
+
signals: dict[str, MarkerSignal] = {}
|
|
53
|
+
for name, compiled in lex.items():
|
|
54
|
+
count, samples = compiled.find_hits(text, sentences)
|
|
55
|
+
coverage = (count / word_count * 100) if word_count else 0.0
|
|
56
|
+
signals[name] = MarkerSignal(
|
|
57
|
+
count=count,
|
|
58
|
+
coverage_per_100_words=round(coverage, 4),
|
|
59
|
+
examples=samples,
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
composite = _compute_depth(signals)
|
|
63
|
+
band = _band_for(composite)
|
|
64
|
+
|
|
65
|
+
return ReflectionAnalysis(
|
|
66
|
+
word_count=word_count,
|
|
67
|
+
sentence_count=len(sentences),
|
|
68
|
+
metacognition=signals["metacognition"],
|
|
69
|
+
criticality=signals["criticality"],
|
|
70
|
+
evidence=signals["evidence"],
|
|
71
|
+
affect=signals["affect"],
|
|
72
|
+
forward_looking=signals["forward"],
|
|
73
|
+
composite_depth_score=round(composite, 4),
|
|
74
|
+
depth_band=band,
|
|
75
|
+
source_kind=source_kind,
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
def analyse(self, path: str | Path) -> ReflectionAnalysis:
|
|
79
|
+
"""Read a file (text directly, binary via document-analyser if [documents] extra is installed)."""
|
|
80
|
+
p = Path(path)
|
|
81
|
+
if not p.exists():
|
|
82
|
+
raise ReflectionAnalyserError(f"File not found: {p}")
|
|
83
|
+
|
|
84
|
+
suffix = p.suffix.lower()
|
|
85
|
+
if suffix in _TEXT_SUFFIXES:
|
|
86
|
+
text = p.read_text(encoding="utf-8", errors="replace")
|
|
87
|
+
return self.analyse_text(text, source_kind=f"file:{suffix.lstrip('.') or 'text'}")
|
|
88
|
+
|
|
89
|
+
# Binary path → compose with document-analyser if available.
|
|
90
|
+
try:
|
|
91
|
+
from document_analyser.extraction import extract_text
|
|
92
|
+
except ImportError as e:
|
|
93
|
+
raise ReflectionAnalyserError(
|
|
94
|
+
f"Reading {suffix} requires the [documents] extra "
|
|
95
|
+
f"(pip install 'reflection-analyser[documents]'): {e}"
|
|
96
|
+
) from e
|
|
97
|
+
|
|
98
|
+
try:
|
|
99
|
+
text = extract_text(p)
|
|
100
|
+
except Exception as e:
|
|
101
|
+
raise ReflectionAnalyserError(f"document-analyser could not extract text from {p}: {e}") from e
|
|
102
|
+
|
|
103
|
+
return self.analyse_text(text, source_kind=f"file:{suffix.lstrip('.')}")
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def _compute_depth(signals: dict[str, MarkerSignal]) -> float:
|
|
107
|
+
"""Weighted composite of capped per-marker coverages, normalised to 0–1."""
|
|
108
|
+
score = 0.0
|
|
109
|
+
for name, weight in _WEIGHTS.items():
|
|
110
|
+
sig = signals.get(name)
|
|
111
|
+
if sig is None:
|
|
112
|
+
continue
|
|
113
|
+
# Normalise coverage to [0, 1] by capping at _COVERAGE_CAP_PER_100W.
|
|
114
|
+
normalised = min(sig.coverage_per_100_words / _COVERAGE_CAP_PER_100W, 1.0)
|
|
115
|
+
score += weight * normalised
|
|
116
|
+
return max(0.0, min(score, 1.0))
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def _band_for(composite: float) -> str:
|
|
120
|
+
for threshold, name in _BAND_THRESHOLDS:
|
|
121
|
+
if composite >= threshold:
|
|
122
|
+
return name
|
|
123
|
+
return "descriptive"
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
"""FastAPI service — reflection-analyser."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from fastapi import FastAPI, File, Form, HTTPException, UploadFile
|
|
7
|
+
from lens_contract import add_contract_routes, add_cors, upload_tempfile
|
|
8
|
+
|
|
9
|
+
from .analyser import ReflectionAnalyser
|
|
10
|
+
from .exceptions import ReflectionAnalyserError
|
|
11
|
+
from .manifest import MANIFEST
|
|
12
|
+
from .schemas import ReflectionAnalysis
|
|
13
|
+
|
|
14
|
+
_lens = ReflectionAnalyser()
|
|
15
|
+
|
|
16
|
+
app = FastAPI(
|
|
17
|
+
title="reflection-analyser",
|
|
18
|
+
description="Reflective-writing analysis — metacognition, criticality, depth (Moon-style bands)",
|
|
19
|
+
version=MANIFEST["version"],
|
|
20
|
+
docs_url="/docs",
|
|
21
|
+
redoc_url="/redoc",
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
add_contract_routes(app, MANIFEST)
|
|
25
|
+
add_cors(app, env_prefix="REFLECTION_ANALYSER")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@app.get("/")
|
|
29
|
+
async def root() -> dict[str, Any]:
|
|
30
|
+
return {
|
|
31
|
+
"service": "reflection-analyser",
|
|
32
|
+
"version": MANIFEST["version"],
|
|
33
|
+
"status": "running",
|
|
34
|
+
"endpoints": {"health": "/health", "manifest": "/manifest", "analyse": "/analyse"},
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@app.post("/analyse", response_model=ReflectionAnalysis)
|
|
39
|
+
async def analyse(
|
|
40
|
+
file: UploadFile | None = File(None, description="Reflection file (.txt/.md or .pdf/.docx with [documents])"),
|
|
41
|
+
text: str | None = Form(None, description="Raw reflection text — use instead of file upload"),
|
|
42
|
+
) -> ReflectionAnalysis:
|
|
43
|
+
if file is None and not text:
|
|
44
|
+
raise HTTPException(status_code=422, detail="Supply either a 'file' upload or a 'text' form field")
|
|
45
|
+
if file is not None and text:
|
|
46
|
+
raise HTTPException(status_code=422, detail="Supply only one of 'file' or 'text', not both")
|
|
47
|
+
|
|
48
|
+
if text:
|
|
49
|
+
try:
|
|
50
|
+
return _lens.analyse_text(text, source_kind="text")
|
|
51
|
+
except ReflectionAnalyserError as e:
|
|
52
|
+
raise HTTPException(status_code=400, detail=str(e))
|
|
53
|
+
|
|
54
|
+
# file branch
|
|
55
|
+
content = await file.read() # type: ignore[union-attr]
|
|
56
|
+
if not content:
|
|
57
|
+
raise HTTPException(status_code=422, detail="Empty file")
|
|
58
|
+
with upload_tempfile(content, file.filename) as tmp_path: # type: ignore[union-attr]
|
|
59
|
+
try:
|
|
60
|
+
return _lens.analyse(tmp_path)
|
|
61
|
+
except ReflectionAnalyserError as e:
|
|
62
|
+
raise HTTPException(status_code=400, detail=str(e))
|
|
63
|
+
except Exception as e:
|
|
64
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
"""CLI entry point for reflection-analyser."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import argparse
|
|
5
|
+
import json
|
|
6
|
+
import sys
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def main() -> None:
|
|
11
|
+
from lens_contract import run_contract_subcommands
|
|
12
|
+
|
|
13
|
+
from .manifest import MANIFEST
|
|
14
|
+
|
|
15
|
+
if run_contract_subcommands(
|
|
16
|
+
MANIFEST,
|
|
17
|
+
app_path="reflection_analyser.api:app",
|
|
18
|
+
default_port=8015,
|
|
19
|
+
env_prefix="REFLECTION_ANALYSER",
|
|
20
|
+
):
|
|
21
|
+
return
|
|
22
|
+
|
|
23
|
+
parser = argparse.ArgumentParser(
|
|
24
|
+
prog="reflection-analyser",
|
|
25
|
+
description="Reflective-writing analysis — metacognition, criticality, depth (Moon-style bands)",
|
|
26
|
+
epilog="subcommands: `serve` (HTTP API on port 8015), `manifest` (capability manifest)",
|
|
27
|
+
)
|
|
28
|
+
parser.add_argument("file", help="File path; or '-' to read text from stdin")
|
|
29
|
+
parser.add_argument("--json", action="store_true", dest="as_json", help="Output raw JSON")
|
|
30
|
+
args = parser.parse_args()
|
|
31
|
+
|
|
32
|
+
_run(args)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _run(args) -> None:
|
|
36
|
+
from .analyser import ReflectionAnalyser
|
|
37
|
+
from .exceptions import ReflectionAnalyserError
|
|
38
|
+
|
|
39
|
+
try:
|
|
40
|
+
if args.file == "-":
|
|
41
|
+
text = sys.stdin.read()
|
|
42
|
+
result = ReflectionAnalyser().analyse_text(text, source_kind="stdin")
|
|
43
|
+
else:
|
|
44
|
+
result = ReflectionAnalyser().analyse(Path(args.file))
|
|
45
|
+
except ReflectionAnalyserError as e:
|
|
46
|
+
if args.as_json:
|
|
47
|
+
print(json.dumps({"error": str(e)}), file=sys.stderr)
|
|
48
|
+
else:
|
|
49
|
+
print(f"Error: {e}", file=sys.stderr)
|
|
50
|
+
sys.exit(1)
|
|
51
|
+
|
|
52
|
+
if args.as_json:
|
|
53
|
+
print(result.model_dump_json(indent=2))
|
|
54
|
+
return
|
|
55
|
+
|
|
56
|
+
_print_summary(result)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _print_summary(result) -> None:
|
|
60
|
+
print(f"Words: {result.word_count:,} Sentences: {result.sentence_count}")
|
|
61
|
+
print(f"Depth band: {result.depth_band} (composite score: {result.composite_depth_score:.2f})")
|
|
62
|
+
print()
|
|
63
|
+
print("Markers (count · per-100-words):")
|
|
64
|
+
for name, sig in (
|
|
65
|
+
("metacognition", result.metacognition),
|
|
66
|
+
("criticality", result.criticality),
|
|
67
|
+
("evidence", result.evidence),
|
|
68
|
+
("affect", result.affect),
|
|
69
|
+
("forward-looking", result.forward_looking),
|
|
70
|
+
):
|
|
71
|
+
print(f" {name:<16} {sig.count:>3} · {sig.coverage_per_100_words:.2f}/100w")
|
|
72
|
+
# Show one example sentence for the strongest marker.
|
|
73
|
+
strongest = max(
|
|
74
|
+
[("metacognition", result.metacognition), ("criticality", result.criticality),
|
|
75
|
+
("evidence", result.evidence), ("affect", result.affect),
|
|
76
|
+
("forward-looking", result.forward_looking)],
|
|
77
|
+
key=lambda kv: kv[1].count,
|
|
78
|
+
)
|
|
79
|
+
if strongest[1].examples:
|
|
80
|
+
print()
|
|
81
|
+
print(f"Strongest signal: {strongest[0]}")
|
|
82
|
+
for ex in strongest[1].examples[:2]:
|
|
83
|
+
short = ex[:160] + ("…" if len(ex) > 160 else "")
|
|
84
|
+
print(f" · {short}")
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
if __name__ == "__main__":
|
|
88
|
+
main()
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
"""Marker lexicons + matcher.
|
|
2
|
+
|
|
3
|
+
Conservative phrase-level lexicons covering the five marker families. Each
|
|
4
|
+
entry is a regex (compiled lazily on first use) so word-boundary handling
|
|
5
|
+
stays consistent. Keep entries phrase-level: matching `realised` as a bare
|
|
6
|
+
word over-fires (`he realised it was wrong` is a narrative phrase, not
|
|
7
|
+
reflection); `I realised` is the reflective form.
|
|
8
|
+
|
|
9
|
+
Adjust the lexicons — not the analyser logic — to tune the signal.
|
|
10
|
+
"""
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import re
|
|
14
|
+
from dataclasses import dataclass
|
|
15
|
+
|
|
16
|
+
# Patterns are case-insensitive at compile time. The lexicons below are
|
|
17
|
+
# *fragments*; we wrap them in word boundaries when compiling.
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
_METACOGNITION_PHRASES = [
|
|
21
|
+
r"I (?:realised|realized|came to (?:see|realise|realize|understand)|recognised|recognized)",
|
|
22
|
+
r"(?:looking|reflecting) back",
|
|
23
|
+
r"on reflection",
|
|
24
|
+
r"I (?:noticed|noted)",
|
|
25
|
+
r"I (?:learnt|learned) (?:that|how)",
|
|
26
|
+
r"I (?:think|thought|believe|believed)",
|
|
27
|
+
r"in hindsight",
|
|
28
|
+
r"I came to (?:think|believe)",
|
|
29
|
+
r"I (?:understand|understood) (?:now|that)",
|
|
30
|
+
]
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
_CRITICALITY_PHRASES = [
|
|
34
|
+
r"however",
|
|
35
|
+
r"on the other hand",
|
|
36
|
+
r"in contrast",
|
|
37
|
+
r"that said",
|
|
38
|
+
r"although",
|
|
39
|
+
r"despite",
|
|
40
|
+
r"nevertheless",
|
|
41
|
+
r"nonetheless",
|
|
42
|
+
r"alternatively",
|
|
43
|
+
r"by contrast",
|
|
44
|
+
r"this (?:contradicts|conflicts with|challenges)",
|
|
45
|
+
r"one (?:view|perspective) is .{0,80}? another",
|
|
46
|
+
]
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
# Evidence: explicit reference to a specific source / quote / date / number.
|
|
50
|
+
# Note: we deliberately don't include bare 4-digit years — "I started uni in 2018"
|
|
51
|
+
# is a temporal reference, not evidence. The APA pattern below catches the
|
|
52
|
+
# evidence-shaped use ("(Smith, 2020)").
|
|
53
|
+
_EVIDENCE_PHRASES = [
|
|
54
|
+
r"according to \w+",
|
|
55
|
+
r"as (\w+\s){0,3}(?:argues|argued|writes|wrote|notes|notes that|points out)",
|
|
56
|
+
r"\([A-Z]\w+,? \d{4}\)", # in-text APA
|
|
57
|
+
r"\[[0-9]+\]", # numeric citation
|
|
58
|
+
r"\"[^\"]{8,}\"", # a quoted span ≥ 8 chars
|
|
59
|
+
r"\bp\.\s?\d+\b", # page reference
|
|
60
|
+
]
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
# Affect: feelings + first-person. Wrapped with "I (was|felt) X" to avoid
|
|
64
|
+
# scoring narratives like "the customer was frustrated".
|
|
65
|
+
_AFFECT_PHRASES = [
|
|
66
|
+
r"I (?:was|felt|feel|am|have been) (?:frustrated|surprised|confused|uncertain|confident|nervous|anxious|excited|proud|disappointed|overwhelmed|relieved|grateful)",
|
|
67
|
+
r"I (?:struggled|enjoyed|hated|loved|disliked|appreciated)",
|
|
68
|
+
r"(?:it|this) was (?:frustrating|surprising|exciting|disappointing|overwhelming|rewarding)",
|
|
69
|
+
r"a sense of (?:relief|achievement|frustration|confusion|pride)",
|
|
70
|
+
]
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
# Forward-looking action / intent.
|
|
74
|
+
_FORWARD_PHRASES = [
|
|
75
|
+
r"next time",
|
|
76
|
+
r"going forward",
|
|
77
|
+
r"in (?:the )?future",
|
|
78
|
+
r"I will",
|
|
79
|
+
r"I plan to",
|
|
80
|
+
r"I intend to",
|
|
81
|
+
r"I (?:hope|want) to",
|
|
82
|
+
r"my next step",
|
|
83
|
+
r"from now on",
|
|
84
|
+
]
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
@dataclass
|
|
88
|
+
class CompiledLexicon:
|
|
89
|
+
name: str
|
|
90
|
+
patterns: list[re.Pattern]
|
|
91
|
+
|
|
92
|
+
def find_hits(self, text: str, sentences: list[str], *, sample_cap: int = 5) -> tuple[int, list[str]]:
|
|
93
|
+
"""Count hits across the text; return (count, sample sentences with hits)."""
|
|
94
|
+
count = 0
|
|
95
|
+
sample: list[str] = []
|
|
96
|
+
sample_seen: set[str] = set()
|
|
97
|
+
for p in self.patterns:
|
|
98
|
+
for m in p.finditer(text):
|
|
99
|
+
count += 1
|
|
100
|
+
# Find the sentence containing this hit (linear scan; sentences are typically <300).
|
|
101
|
+
pos = m.start()
|
|
102
|
+
acc = 0
|
|
103
|
+
hit_sentence = ""
|
|
104
|
+
for s in sentences:
|
|
105
|
+
acc += len(s)
|
|
106
|
+
if acc >= pos:
|
|
107
|
+
hit_sentence = s.strip()
|
|
108
|
+
break
|
|
109
|
+
if hit_sentence and hit_sentence not in sample_seen and len(sample) < sample_cap:
|
|
110
|
+
sample.append(hit_sentence)
|
|
111
|
+
sample_seen.add(hit_sentence)
|
|
112
|
+
return count, sample
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def _compile(name: str, phrases: list[str]) -> CompiledLexicon:
|
|
116
|
+
"""Compile each phrase, wrapping with word boundaries *only* where the phrase
|
|
117
|
+
starts/ends with a word character. Patterns that already use punctuation
|
|
118
|
+
anchors (`(Author, 2020)`, `"quoted span"`, `[42]`) need no boundary —
|
|
119
|
+
`\\b` next to non-word chars over-restricts (fails the inner-quote case).
|
|
120
|
+
"""
|
|
121
|
+
compiled: list[re.Pattern] = []
|
|
122
|
+
for phrase in phrases:
|
|
123
|
+
prefix = r"\b" if _first_real_char_is_word(phrase) else ""
|
|
124
|
+
suffix = r"\b" if _last_real_char_is_word(phrase) else ""
|
|
125
|
+
compiled.append(re.compile(prefix + phrase + suffix, re.IGNORECASE))
|
|
126
|
+
return CompiledLexicon(name=name, patterns=compiled)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
_WORD_RE = re.compile(r"\w")
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def _first_real_char_is_word(phrase: str) -> bool:
|
|
133
|
+
"""Skip leading regex meta-chars to find the first 'real' character."""
|
|
134
|
+
skip = set(r"(?:\\")
|
|
135
|
+
for c in phrase:
|
|
136
|
+
if c in skip:
|
|
137
|
+
continue
|
|
138
|
+
return bool(_WORD_RE.match(c))
|
|
139
|
+
return False
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def _last_real_char_is_word(phrase: str) -> bool:
|
|
143
|
+
skip = set(r")?:")
|
|
144
|
+
for c in reversed(phrase):
|
|
145
|
+
if c in skip:
|
|
146
|
+
continue
|
|
147
|
+
return bool(_WORD_RE.match(c))
|
|
148
|
+
return False
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
# Lazily-built singletons — compile on first use.
|
|
152
|
+
_LEXICONS: dict[str, CompiledLexicon] | None = None
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def lexicons() -> dict[str, CompiledLexicon]:
|
|
156
|
+
global _LEXICONS
|
|
157
|
+
if _LEXICONS is None:
|
|
158
|
+
_LEXICONS = {
|
|
159
|
+
"metacognition": _compile("metacognition", _METACOGNITION_PHRASES),
|
|
160
|
+
"criticality": _compile("criticality", _CRITICALITY_PHRASES),
|
|
161
|
+
"evidence": _compile("evidence", _EVIDENCE_PHRASES),
|
|
162
|
+
"affect": _compile("affect", _AFFECT_PHRASES),
|
|
163
|
+
"forward": _compile("forward", _FORWARD_PHRASES),
|
|
164
|
+
}
|
|
165
|
+
return _LEXICONS
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""Capability manifest for the lens family (consumed by auto-analyser)."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from lens_contract import make_manifest
|
|
5
|
+
|
|
6
|
+
# Explicit-only — same pattern as conversation-analyser. Prose extensions already
|
|
7
|
+
# auto-route to document-analyser; reflection is a different interpretation of
|
|
8
|
+
# the same words (depth/metacognition rather than readability).
|
|
9
|
+
MANIFEST = make_manifest(
|
|
10
|
+
name="reflection-analyser",
|
|
11
|
+
accepts=["reflection", "journal", "metacognition"],
|
|
12
|
+
extensions=[], # explicit-only — invoke deliberately
|
|
13
|
+
auto_routable=False,
|
|
14
|
+
produces="ReflectionAnalysis",
|
|
15
|
+
)
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"""Pydantic schemas for reflection-analyser output."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from pydantic import BaseModel, Field
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class MarkerSignal(BaseModel):
|
|
8
|
+
"""Counts + sample for one reflection-marker family."""
|
|
9
|
+
|
|
10
|
+
count: int = 0
|
|
11
|
+
coverage_per_100_words: float = Field(
|
|
12
|
+
0.0,
|
|
13
|
+
description="Hits per 100 words — a length-normalised proxy for marker density.",
|
|
14
|
+
)
|
|
15
|
+
examples: list[str] = Field(
|
|
16
|
+
default_factory=list,
|
|
17
|
+
description="First few sentences where the marker fired (capped, for transparency).",
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class ReflectionAnalysis(BaseModel):
|
|
22
|
+
"""Top-level result returned by ReflectionAnalyser.analyse* methods."""
|
|
23
|
+
|
|
24
|
+
word_count: int = 0
|
|
25
|
+
sentence_count: int = 0
|
|
26
|
+
|
|
27
|
+
# Per-marker signals
|
|
28
|
+
metacognition: MarkerSignal
|
|
29
|
+
criticality: MarkerSignal
|
|
30
|
+
evidence: MarkerSignal
|
|
31
|
+
affect: MarkerSignal
|
|
32
|
+
forward_looking: MarkerSignal
|
|
33
|
+
|
|
34
|
+
# Composite scoring
|
|
35
|
+
composite_depth_score: float = Field(
|
|
36
|
+
0.0,
|
|
37
|
+
description="0–1; weighted blend of per-marker coverages (see analyser._compute_depth).",
|
|
38
|
+
ge=0.0,
|
|
39
|
+
le=1.0,
|
|
40
|
+
)
|
|
41
|
+
depth_band: str = Field(
|
|
42
|
+
"descriptive",
|
|
43
|
+
description="descriptive | dialogic | critical | transformative",
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
# Provenance / source-of-input
|
|
47
|
+
source_kind: str = Field(
|
|
48
|
+
"text",
|
|
49
|
+
description="'text' | 'file:txt' | 'file:md' | 'file:pdf' | 'file:docx' | …",
|
|
50
|
+
)
|
|
File without changes
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
"""End-to-end tests for ReflectionAnalyser composite scoring and depth bands."""
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
import pytest
|
|
5
|
+
|
|
6
|
+
from reflection_analyser import (
|
|
7
|
+
ReflectionAnalyser,
|
|
8
|
+
ReflectionAnalyserError,
|
|
9
|
+
ReflectionAnalysis,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
# ── synthetic samples spanning the four depth bands ──────────────────────
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
DESCRIPTIVE_SAMPLE = (
|
|
17
|
+
"Today I went to the workshop. The instructor explained the procedure. "
|
|
18
|
+
"Then we tried the exercise. After that we had lunch. We continued in the afternoon."
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
DIALOGIC_SAMPLE = (
|
|
22
|
+
"Today I went to the workshop. I realised that I had been doing it wrong. "
|
|
23
|
+
"Looking back, the instructor's point about variables made sense. "
|
|
24
|
+
"However, I'm still not sure about the loop syntax."
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
CRITICAL_SAMPLE = (
|
|
28
|
+
"I realised the approach I had been using was flawed. According to Smith (2020), "
|
|
29
|
+
"this pattern is common. Looking back, I felt frustrated. "
|
|
30
|
+
"However, on the other hand, the alternative was risky. "
|
|
31
|
+
"Although the instructor's example seemed simple, in practice it was harder. "
|
|
32
|
+
"On reflection I understand now why."
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
TRANSFORMATIVE_SAMPLE = (
|
|
36
|
+
"I realised that the approach I had been using was flawed. "
|
|
37
|
+
"According to Smith (2020), the recommended pattern is different. "
|
|
38
|
+
"Looking back, I felt frustrated by my earlier choice. "
|
|
39
|
+
"However, in contrast to my initial view, the evidence is clear. "
|
|
40
|
+
"Although there are trade-offs, on the other hand the long-term benefit is real. "
|
|
41
|
+
"I will revise my approach. Next time I will plan more carefully. "
|
|
42
|
+
"Going forward, I plan to apply this lesson to the next assignment."
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class TestBands:
|
|
47
|
+
def test_descriptive(self):
|
|
48
|
+
r = ReflectionAnalyser().analyse_text(DESCRIPTIVE_SAMPLE)
|
|
49
|
+
assert isinstance(r, ReflectionAnalysis)
|
|
50
|
+
assert r.depth_band == "descriptive"
|
|
51
|
+
assert r.composite_depth_score < 0.25
|
|
52
|
+
|
|
53
|
+
def test_dialogic(self):
|
|
54
|
+
r = ReflectionAnalyser().analyse_text(DIALOGIC_SAMPLE)
|
|
55
|
+
# Has 'I realised', 'looking back', 'however' — should land dialogic or higher.
|
|
56
|
+
assert r.depth_band in ("dialogic", "critical")
|
|
57
|
+
assert r.composite_depth_score >= 0.25
|
|
58
|
+
|
|
59
|
+
def test_critical_or_better(self):
|
|
60
|
+
r = ReflectionAnalyser().analyse_text(CRITICAL_SAMPLE)
|
|
61
|
+
assert r.depth_band in ("critical", "transformative")
|
|
62
|
+
assert r.composite_depth_score >= 0.40
|
|
63
|
+
|
|
64
|
+
def test_transformative(self):
|
|
65
|
+
r = ReflectionAnalyser().analyse_text(TRANSFORMATIVE_SAMPLE)
|
|
66
|
+
assert r.depth_band in ("critical", "transformative")
|
|
67
|
+
assert r.composite_depth_score >= 0.45
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class TestSignals:
|
|
71
|
+
def test_metacognition_count_matches(self):
|
|
72
|
+
r = ReflectionAnalyser().analyse_text(DIALOGIC_SAMPLE)
|
|
73
|
+
# "I realised" + "Looking back" → 2 metacognition hits.
|
|
74
|
+
assert r.metacognition.count >= 2
|
|
75
|
+
|
|
76
|
+
def test_examples_captured(self):
|
|
77
|
+
r = ReflectionAnalyser().analyse_text(DIALOGIC_SAMPLE)
|
|
78
|
+
assert len(r.metacognition.examples) >= 1
|
|
79
|
+
|
|
80
|
+
def test_word_count(self):
|
|
81
|
+
r = ReflectionAnalyser().analyse_text("Hello world.")
|
|
82
|
+
assert r.word_count == 2
|
|
83
|
+
assert r.sentence_count == 1
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class TestErrors:
|
|
87
|
+
def test_empty_raises(self):
|
|
88
|
+
with pytest.raises(ReflectionAnalyserError, match="Empty"):
|
|
89
|
+
ReflectionAnalyser().analyse_text("")
|
|
90
|
+
|
|
91
|
+
def test_whitespace_raises(self):
|
|
92
|
+
with pytest.raises(ReflectionAnalyserError, match="Empty"):
|
|
93
|
+
ReflectionAnalyser().analyse_text(" \n\n ")
|
|
94
|
+
|
|
95
|
+
def test_missing_file_raises(self, tmp_path: Path):
|
|
96
|
+
with pytest.raises(ReflectionAnalyserError, match="not found"):
|
|
97
|
+
ReflectionAnalyser().analyse(tmp_path / "nope.md")
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
class TestFileInputs:
|
|
101
|
+
def test_md_file(self, tmp_path: Path):
|
|
102
|
+
p = tmp_path / "journal.md"
|
|
103
|
+
p.write_text(DIALOGIC_SAMPLE)
|
|
104
|
+
r = ReflectionAnalyser().analyse(p)
|
|
105
|
+
assert r.source_kind == "file:md"
|
|
106
|
+
assert r.metacognition.count >= 2
|
|
107
|
+
|
|
108
|
+
def test_txt_file(self, tmp_path: Path):
|
|
109
|
+
p = tmp_path / "journal.txt"
|
|
110
|
+
p.write_text(CRITICAL_SAMPLE)
|
|
111
|
+
r = ReflectionAnalyser().analyse(p)
|
|
112
|
+
assert r.source_kind == "file:txt"
|
|
113
|
+
|
|
114
|
+
def test_binary_without_extra_raises_helpful_error(self, tmp_path: Path):
|
|
115
|
+
# .pdf path without document-analyser installed should error explicitly.
|
|
116
|
+
p = tmp_path / "x.pdf"
|
|
117
|
+
p.write_bytes(b"%PDF-1.4\n%%EOF\n")
|
|
118
|
+
try:
|
|
119
|
+
import document_analyser # noqa: F401
|
|
120
|
+
except ImportError:
|
|
121
|
+
with pytest.raises(ReflectionAnalyserError, match="documents.*extra"):
|
|
122
|
+
ReflectionAnalyser().analyse(p)
|
|
123
|
+
else:
|
|
124
|
+
# If document-analyser IS installed (sibling editable), the path
|
|
125
|
+
# works — that's the documents-extra-is-effective branch. Either
|
|
126
|
+
# extraction succeeds or it errors with a doc-extraction reason.
|
|
127
|
+
try:
|
|
128
|
+
ReflectionAnalyser().analyse(p)
|
|
129
|
+
except ReflectionAnalyserError:
|
|
130
|
+
pass # extraction failure on a bogus PDF is fine for this test
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
"""HTTP smoke tests — the family contract surface."""
|
|
2
|
+
from fastapi.testclient import TestClient
|
|
3
|
+
|
|
4
|
+
from reflection_analyser.api import app
|
|
5
|
+
from reflection_analyser.manifest import MANIFEST
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
client = TestClient(app)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def test_health():
|
|
12
|
+
r = client.get("/health")
|
|
13
|
+
assert r.status_code == 200
|
|
14
|
+
body = r.json()
|
|
15
|
+
assert body["status"] == "ok"
|
|
16
|
+
assert body["version"] == MANIFEST["version"]
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def test_manifest():
|
|
20
|
+
r = client.get("/manifest")
|
|
21
|
+
assert r.status_code == 200
|
|
22
|
+
m = r.json()
|
|
23
|
+
assert m["name"] == "reflection-analyser"
|
|
24
|
+
assert m["auto_routable"] is False
|
|
25
|
+
assert m["extensions"] == []
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def test_analyse_text_form_field():
|
|
29
|
+
sample = "I realised today, looking back, however that I will plan better next time."
|
|
30
|
+
r = client.post("/analyse", data={"text": sample})
|
|
31
|
+
assert r.status_code == 200, r.text
|
|
32
|
+
body = r.json()
|
|
33
|
+
assert body["word_count"] > 0
|
|
34
|
+
assert body["depth_band"] in {"descriptive", "dialogic", "critical", "transformative"}
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def test_analyse_file_upload():
|
|
38
|
+
text = "I realised something. Looking back, however, I noticed it. I will change."
|
|
39
|
+
r = client.post("/analyse", files={"file": ("journal.md", text.encode(), "text/markdown")})
|
|
40
|
+
assert r.status_code == 200, r.text
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def test_analyse_no_input_returns_422():
|
|
44
|
+
r = client.post("/analyse")
|
|
45
|
+
assert r.status_code == 422
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def test_analyse_both_inputs_returns_422():
|
|
49
|
+
r = client.post(
|
|
50
|
+
"/analyse",
|
|
51
|
+
data={"text": "x"},
|
|
52
|
+
files={"file": ("a.md", b"y", "text/markdown")},
|
|
53
|
+
)
|
|
54
|
+
assert r.status_code == 422
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
"""CLI smoke tests."""
|
|
2
|
+
import json
|
|
3
|
+
import subprocess
|
|
4
|
+
import sys
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def _run(*args, stdin: str | None = None) -> subprocess.CompletedProcess:
|
|
9
|
+
return subprocess.run(
|
|
10
|
+
[sys.executable, "-m", "reflection_analyser.cli", *map(str, args)],
|
|
11
|
+
capture_output=True,
|
|
12
|
+
text=True,
|
|
13
|
+
input=stdin,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def test_file(tmp_path: Path):
|
|
18
|
+
p = tmp_path / "journal.md"
|
|
19
|
+
p.write_text("I realised today, looking back, however that I will plan better next time.")
|
|
20
|
+
r = _run(p)
|
|
21
|
+
assert r.returncode == 0, r.stderr
|
|
22
|
+
assert "Depth band:" in r.stdout
|
|
23
|
+
assert "Markers" in r.stdout
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def test_json(tmp_path: Path):
|
|
27
|
+
p = tmp_path / "journal.md"
|
|
28
|
+
p.write_text("I realised something significant.")
|
|
29
|
+
r = _run(p, "--json")
|
|
30
|
+
assert r.returncode == 0, r.stderr
|
|
31
|
+
data = json.loads(r.stdout)
|
|
32
|
+
assert "depth_band" in data
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def test_stdin():
|
|
36
|
+
r = _run("-", stdin="I realised, looking back, that I will change.")
|
|
37
|
+
assert r.returncode == 0, r.stderr
|
|
38
|
+
assert "Depth band:" in r.stdout
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def test_manifest_subcommand():
|
|
42
|
+
r = _run("manifest")
|
|
43
|
+
assert r.returncode == 0, r.stderr
|
|
44
|
+
data = json.loads(r.stdout)
|
|
45
|
+
assert data["name"] == "reflection-analyser"
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
"""Unit tests for the marker lexicons."""
|
|
2
|
+
from reflection_analyser.lexicon import lexicons
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def _hits(name: str, text: str) -> int:
|
|
6
|
+
"""Pure count for the named marker family in `text`."""
|
|
7
|
+
sentences = [s for s in text.split(". ") if s]
|
|
8
|
+
count, _ = lexicons()[name].find_hits(text, sentences)
|
|
9
|
+
return count
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class TestMetacognition:
|
|
13
|
+
def test_first_person_realised(self):
|
|
14
|
+
assert _hits("metacognition", "I realised that I had been wrong.") == 1
|
|
15
|
+
|
|
16
|
+
def test_looking_back(self):
|
|
17
|
+
assert _hits("metacognition", "Looking back, the choice was clear.") == 1
|
|
18
|
+
|
|
19
|
+
def test_he_realised_does_not_fire(self):
|
|
20
|
+
# "he realised" is narrative, not reflection — must NOT match.
|
|
21
|
+
assert _hits("metacognition", "He realised it was Tuesday.") == 0
|
|
22
|
+
|
|
23
|
+
def test_phrases_with_us_and_uk_spelling(self):
|
|
24
|
+
# Lexicon includes both `realised` and `realized`.
|
|
25
|
+
assert _hits("metacognition", "I realized something new.") == 1
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class TestCriticality:
|
|
29
|
+
def test_however(self):
|
|
30
|
+
assert _hits("criticality", "It was hard. However, I learned a lot.") == 1
|
|
31
|
+
|
|
32
|
+
def test_on_the_other_hand(self):
|
|
33
|
+
assert _hits("criticality", "On the other hand, the data was clear.") == 1
|
|
34
|
+
|
|
35
|
+
def test_no_false_positive(self):
|
|
36
|
+
assert _hits("criticality", "Today I went to class.") == 0
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class TestEvidence:
|
|
40
|
+
def test_apa_in_text(self):
|
|
41
|
+
assert _hits("evidence", "As noted (Smith, 2020), the trend is clear.") == 1
|
|
42
|
+
|
|
43
|
+
def test_quoted_span(self):
|
|
44
|
+
assert _hits("evidence", 'The author wrote "this is a quotation".') == 1
|
|
45
|
+
|
|
46
|
+
def test_page_reference(self):
|
|
47
|
+
assert _hits("evidence", "See p. 42 for details.") == 1
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class TestAffect:
|
|
51
|
+
def test_i_felt_frustrated(self):
|
|
52
|
+
assert _hits("affect", "I felt frustrated by the result.") == 1
|
|
53
|
+
|
|
54
|
+
def test_third_person_frustrated_does_not_fire(self):
|
|
55
|
+
# "the customer was frustrated" should NOT count.
|
|
56
|
+
assert _hits("affect", "The customer was frustrated.") == 0
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class TestForward:
|
|
60
|
+
def test_next_time(self):
|
|
61
|
+
assert _hits("forward", "Next time I will plan more carefully.") >= 1 # 'next time' + 'I will'
|
|
62
|
+
|
|
63
|
+
def test_going_forward(self):
|
|
64
|
+
assert _hits("forward", "Going forward, I'll keep notes.") == 1
|