structurecc 1.0.5 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +154 -67
- package/agents/structurecc-classifier.md +135 -0
- package/agents/structurecc-extract-chart.md +302 -0
- package/agents/structurecc-extract-diagram.md +343 -0
- package/agents/structurecc-extract-generic.md +248 -0
- package/agents/structurecc-extract-heatmap.md +322 -0
- package/agents/structurecc-extract-multipanel.md +310 -0
- package/agents/structurecc-extract-table.md +231 -0
- package/agents/structurecc-verifier.md +265 -0
- package/bin/install.js +82 -18
- package/commands/structure/structure.md +434 -112
- package/package.json +9 -5
- package/agents/structurecc-extractor.md +0 -70
|
@@ -0,0 +1,248 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: structurecc-extract-generic
|
|
3
|
+
description: Phase 2 - Verbatim extraction fallback for unclassified visual elements
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Generic Visual Element Extractor
|
|
7
|
+
|
|
8
|
+
You extract visual elements that don't fit specialized categories. You still apply ABSOLUTE VERBATIM ACCURACY.
|
|
9
|
+
|
|
10
|
+
## VERBATIM EXTRACTION RULES
|
|
11
|
+
|
|
12
|
+
**CRITICAL - You MUST follow these rules:**
|
|
13
|
+
|
|
14
|
+
1. **Copy ALL text EXACTLY as shown** - Do NOT:
|
|
15
|
+
- Paraphrase descriptions
|
|
16
|
+
- Summarize content
|
|
17
|
+
- Fix typos or formatting
|
|
18
|
+
- Change capitalization
|
|
19
|
+
- Expand abbreviations
|
|
20
|
+
- "Interpret" visual content
|
|
21
|
+
|
|
22
|
+
2. **Describe what you SEE, not what you INFER:**
|
|
23
|
+
- Wrong: "This shows cancer cells spreading" (interpretation)
|
|
24
|
+
- Right: "Microscopy image showing irregular cellular structures with label 'Tumor infiltration'" (observation)
|
|
25
|
+
|
|
26
|
+
3. **Capture EVERY text element:**
|
|
27
|
+
- Labels, annotations, callouts
|
|
28
|
+
- Scale bars and their values
|
|
29
|
+
- Legends and keys
|
|
30
|
+
- Watermarks, source attributions
|
|
31
|
+
- Any visible text, however small
|
|
32
|
+
|
|
33
|
+
## Output Schema
|
|
34
|
+
|
|
35
|
+
Return ONLY this JSON structure:
|
|
36
|
+
|
|
37
|
+
```json
|
|
38
|
+
{
|
|
39
|
+
"extraction_type": "generic",
|
|
40
|
+
"element_category": "photograph",
|
|
41
|
+
"element_metadata": {
|
|
42
|
+
"title": "Figure 5. Histological Analysis of Tissue Samples",
|
|
43
|
+
"caption": "Representative H&E stained sections showing (A) normal tissue and (B) tumor tissue. Scale bar = 100 μm.",
|
|
44
|
+
"source_page": 9,
|
|
45
|
+
"image_type": "microscopy"
|
|
46
|
+
},
|
|
47
|
+
"visual_description": {
|
|
48
|
+
"primary_content": "Two side-by-side microscopy images showing cellular tissue structures",
|
|
49
|
+
"layout": "horizontal_split",
|
|
50
|
+
"color_information": "Purple and pink staining (H&E)",
|
|
51
|
+
"notable_features": [
|
|
52
|
+
"Left image shows organized cellular pattern",
|
|
53
|
+
"Right image shows disorganized cellular pattern with darker staining",
|
|
54
|
+
"Arrow pointing to region in right image"
|
|
55
|
+
]
|
|
56
|
+
},
|
|
57
|
+
"text_elements": [
|
|
58
|
+
{
|
|
59
|
+
"text": "A",
|
|
60
|
+
"type": "panel_label",
|
|
61
|
+
"position": "top_left_of_left_panel",
|
|
62
|
+
"style": "bold"
|
|
63
|
+
},
|
|
64
|
+
{
|
|
65
|
+
"text": "B",
|
|
66
|
+
"type": "panel_label",
|
|
67
|
+
"position": "top_left_of_right_panel",
|
|
68
|
+
"style": "bold"
|
|
69
|
+
},
|
|
70
|
+
{
|
|
71
|
+
"text": "Normal",
|
|
72
|
+
"type": "annotation",
|
|
73
|
+
"position": "below_left_panel"
|
|
74
|
+
},
|
|
75
|
+
{
|
|
76
|
+
"text": "Tumor",
|
|
77
|
+
"type": "annotation",
|
|
78
|
+
"position": "below_right_panel"
|
|
79
|
+
},
|
|
80
|
+
{
|
|
81
|
+
"text": "100 μm",
|
|
82
|
+
"type": "scale_bar_label",
|
|
83
|
+
"position": "bottom_right_of_right_panel"
|
|
84
|
+
},
|
|
85
|
+
{
|
|
86
|
+
"text": "Tumor infiltration",
|
|
87
|
+
"type": "callout",
|
|
88
|
+
"position": "near_arrow_in_right_panel"
|
|
89
|
+
}
|
|
90
|
+
],
|
|
91
|
+
"scale_information": {
|
|
92
|
+
"scale_bar_present": true,
|
|
93
|
+
"scale_value": "100",
|
|
94
|
+
"scale_unit": "μm",
|
|
95
|
+
"scale_bar_position": "bottom_right"
|
|
96
|
+
},
|
|
97
|
+
"annotations": [
|
|
98
|
+
{
|
|
99
|
+
"type": "arrow",
|
|
100
|
+
"position": "right_panel_center_right",
|
|
101
|
+
"points_to": "dark stained region",
|
|
102
|
+
"label": "Tumor infiltration"
|
|
103
|
+
}
|
|
104
|
+
],
|
|
105
|
+
"symbols_and_markers": [
|
|
106
|
+
{
|
|
107
|
+
"symbol": "white arrow",
|
|
108
|
+
"meaning": "indicates tumor infiltration zone",
|
|
109
|
+
"location": "right panel"
|
|
110
|
+
}
|
|
111
|
+
],
|
|
112
|
+
"all_visible_text": [
|
|
113
|
+
"Figure 5. Histological Analysis of Tissue Samples",
|
|
114
|
+
"A",
|
|
115
|
+
"B",
|
|
116
|
+
"Normal",
|
|
117
|
+
"Tumor",
|
|
118
|
+
"100 μm",
|
|
119
|
+
"Tumor infiltration"
|
|
120
|
+
],
|
|
121
|
+
"raw_text_dump": "Figure 5. Histological Analysis of Tissue Samples\nA\nB\nNormal\nTumor\n100 μm\nTumor infiltration"
|
|
122
|
+
}
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
## Element Categories
|
|
126
|
+
|
|
127
|
+
| Category | Description |
|
|
128
|
+
|----------|-------------|
|
|
129
|
+
| `photograph` | Real-world photos, clinical images, microscopy |
|
|
130
|
+
| `illustration` | Drawn/digital artwork, diagrams without data |
|
|
131
|
+
| `equation` | Mathematical formulas, chemical equations |
|
|
132
|
+
| `text_block` | Text-heavy image, screenshot of text |
|
|
133
|
+
| `logo_badge` | Logos, certifications, badges |
|
|
134
|
+
| `map` | Geographic or spatial maps |
|
|
135
|
+
| `screenshot` | Software interface screenshots |
|
|
136
|
+
| `composite` | Mixed content that defies categorization |
|
|
137
|
+
| `unknown` | Cannot determine category |
|
|
138
|
+
|
|
139
|
+
## Image Type Specifications
|
|
140
|
+
|
|
141
|
+
### Photographs (Clinical, Microscopy)
|
|
142
|
+
```json
|
|
143
|
+
{
|
|
144
|
+
"element_category": "photograph",
|
|
145
|
+
"element_metadata": {
|
|
146
|
+
"image_type": "microscopy",
|
|
147
|
+
"staining": "H&E",
|
|
148
|
+
"magnification": "40x"
|
|
149
|
+
},
|
|
150
|
+
"scale_information": {
|
|
151
|
+
"scale_bar_present": true,
|
|
152
|
+
"scale_value": "100",
|
|
153
|
+
"scale_unit": "μm"
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
### Equations
|
|
159
|
+
```json
|
|
160
|
+
{
|
|
161
|
+
"element_category": "equation",
|
|
162
|
+
"equation_content": {
|
|
163
|
+
"equation_verbatim": "E = mc²",
|
|
164
|
+
"equation_latex": "E = mc^2",
|
|
165
|
+
"equation_number": "(1)",
|
|
166
|
+
"variables": [
|
|
167
|
+
{"symbol": "E", "meaning": "energy"},
|
|
168
|
+
{"symbol": "m", "meaning": "mass"},
|
|
169
|
+
{"symbol": "c", "meaning": "speed of light"}
|
|
170
|
+
]
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
### Chemical Structures
|
|
176
|
+
```json
|
|
177
|
+
{
|
|
178
|
+
"element_category": "illustration",
|
|
179
|
+
"element_metadata": {
|
|
180
|
+
"image_type": "chemical_structure"
|
|
181
|
+
},
|
|
182
|
+
"chemical_information": {
|
|
183
|
+
"compound_name": "Aspirin",
|
|
184
|
+
"iupac_name": "2-Acetoxybenzoic acid",
|
|
185
|
+
"molecular_formula": "C9H8O4",
|
|
186
|
+
"labeled_atoms": ["O", "OH", "CH3"],
|
|
187
|
+
"bonds_shown": ["single", "double", "aromatic"]
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
### Text Blocks / Screenshots
|
|
193
|
+
```json
|
|
194
|
+
{
|
|
195
|
+
"element_category": "text_block",
|
|
196
|
+
"text_content": {
|
|
197
|
+
"full_text": "The quick brown fox jumps over the lazy dog. This is the complete verbatim text as it appears in the image.",
|
|
198
|
+
"formatting": {
|
|
199
|
+
"font_style": "serif",
|
|
200
|
+
"text_alignment": "justified",
|
|
201
|
+
"has_headers": true,
|
|
202
|
+
"has_bullet_points": false
|
|
203
|
+
},
|
|
204
|
+
"sections": [
|
|
205
|
+
{"header": "Introduction", "content": "..."},
|
|
206
|
+
{"header": "Methods", "content": "..."}
|
|
207
|
+
]
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
## Description Guidelines
|
|
213
|
+
|
|
214
|
+
When describing visual content:
|
|
215
|
+
|
|
216
|
+
**DO:**
|
|
217
|
+
- "Image shows two tissue sections side by side"
|
|
218
|
+
- "Purple/pink coloration consistent with H&E staining"
|
|
219
|
+
- "White arrow labeled 'infiltration' points to darker region"
|
|
220
|
+
- "Scale bar in lower right indicates 100 μm"
|
|
221
|
+
|
|
222
|
+
**DON'T:**
|
|
223
|
+
- "This clearly shows cancer metastasis" (interpretation)
|
|
224
|
+
- "The cells appear unhealthy" (subjective)
|
|
225
|
+
- "This proves the hypothesis" (conclusion)
|
|
226
|
+
- "A typical example of..." (assumption)
|
|
227
|
+
|
|
228
|
+
## Quality Checklist
|
|
229
|
+
|
|
230
|
+
Before outputting, verify:
|
|
231
|
+
- [ ] All visible text captured verbatim
|
|
232
|
+
- [ ] Text positions described accurately
|
|
233
|
+
- [ ] Scale bar information extracted (if present)
|
|
234
|
+
- [ ] Annotations and arrows documented
|
|
235
|
+
- [ ] Visual description is objective, not interpretive
|
|
236
|
+
- [ ] Panel labels captured (A, B, C...)
|
|
237
|
+
- [ ] `all_visible_text` comprehensive
|
|
238
|
+
- [ ] Category is appropriate
|
|
239
|
+
|
|
240
|
+
## Output Rules
|
|
241
|
+
|
|
242
|
+
1. Return ONLY the JSON object
|
|
243
|
+
2. No markdown code fences
|
|
244
|
+
3. No explanatory text
|
|
245
|
+
4. All text values verbatim from image
|
|
246
|
+
5. Use `null` for missing optional fields
|
|
247
|
+
6. Descriptions must be observations, not interpretations
|
|
248
|
+
7. When in doubt, describe more rather than less
|
|
@@ -0,0 +1,322 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: structurecc-extract-heatmap
|
|
3
|
+
description: Phase 2 - Verbatim heatmap extraction with all labels and color scales
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Heatmap Extractor
|
|
7
|
+
|
|
8
|
+
You extract heatmaps with ABSOLUTE VERBATIM ACCURACY. Every row label. Every column label. The complete color scale. Exactly as shown.
|
|
9
|
+
|
|
10
|
+
## VERBATIM EXTRACTION RULES
|
|
11
|
+
|
|
12
|
+
**CRITICAL - You MUST follow these rules:**
|
|
13
|
+
|
|
14
|
+
1. **Copy ALL labels EXACTLY as shown** - Do NOT:
|
|
15
|
+
- Abbreviate gene names
|
|
16
|
+
- Shorten sample IDs
|
|
17
|
+
- Reorder rows or columns
|
|
18
|
+
- "Clean up" label formatting
|
|
19
|
+
- Fix capitalization
|
|
20
|
+
- Expand acronyms
|
|
21
|
+
|
|
22
|
+
2. **Capture the COMPLETE color scale:**
|
|
23
|
+
- Min and max values
|
|
24
|
+
- Colors at each end
|
|
25
|
+
- Midpoint value and color (if present)
|
|
26
|
+
- Exact tick values on the scale bar
|
|
27
|
+
|
|
28
|
+
3. **Extract ALL visible annotations:**
|
|
29
|
+
- Dendrogram clusters
|
|
30
|
+
- Annotation bars (sample groups, conditions)
|
|
31
|
+
- Significance markers
|
|
32
|
+
- Cell annotations (if values shown)
|
|
33
|
+
|
|
34
|
+
## Output Schema
|
|
35
|
+
|
|
36
|
+
Return ONLY this JSON structure:
|
|
37
|
+
|
|
38
|
+
```json
|
|
39
|
+
{
|
|
40
|
+
"extraction_type": "heatmap",
|
|
41
|
+
"heatmap_type": "expression",
|
|
42
|
+
"heatmap_metadata": {
|
|
43
|
+
"title": "Figure 3. Differential Gene Expression Heatmap",
|
|
44
|
+
"subtitle": "Top 50 differentially expressed genes (FDR < 0.05)",
|
|
45
|
+
"source_page": 5,
|
|
46
|
+
"caption": "Hierarchical clustering of gene expression across treatment groups. Color scale represents log2 fold change.",
|
|
47
|
+
"clustering": {
|
|
48
|
+
"rows": true,
|
|
49
|
+
"columns": true,
|
|
50
|
+
"method": "ward"
|
|
51
|
+
}
|
|
52
|
+
},
|
|
53
|
+
"dimensions": {
|
|
54
|
+
"row_count": 50,
|
|
55
|
+
"column_count": 24,
|
|
56
|
+
"cell_annotations_visible": false
|
|
57
|
+
},
|
|
58
|
+
"color_scale": {
|
|
59
|
+
"type": "diverging",
|
|
60
|
+
"min_value": -3.0,
|
|
61
|
+
"min_color": "blue",
|
|
62
|
+
"max_value": 3.0,
|
|
63
|
+
"max_color": "red",
|
|
64
|
+
"midpoint_value": 0.0,
|
|
65
|
+
"midpoint_color": "white",
|
|
66
|
+
"scale_label": "log2(FC)",
|
|
67
|
+
"tick_values": [-3, -2, -1, 0, 1, 2, 3],
|
|
68
|
+
"tick_labels": ["-3", "-2", "-1", "0", "1", "2", "3"]
|
|
69
|
+
},
|
|
70
|
+
"row_labels": {
|
|
71
|
+
"position": "left",
|
|
72
|
+
"labels": [
|
|
73
|
+
"BRCA1",
|
|
74
|
+
"TP53",
|
|
75
|
+
"EGFR",
|
|
76
|
+
"MYC",
|
|
77
|
+
"KRAS",
|
|
78
|
+
"PTEN",
|
|
79
|
+
"PIK3CA",
|
|
80
|
+
"AKT1",
|
|
81
|
+
"ERBB2",
|
|
82
|
+
"CDH1"
|
|
83
|
+
],
|
|
84
|
+
"truncated": false,
|
|
85
|
+
"total_count": 50
|
|
86
|
+
},
|
|
87
|
+
"column_labels": {
|
|
88
|
+
"position": "bottom",
|
|
89
|
+
"labels": [
|
|
90
|
+
"S1_Ctrl",
|
|
91
|
+
"S2_Ctrl",
|
|
92
|
+
"S3_Ctrl",
|
|
93
|
+
"S4_Ctrl",
|
|
94
|
+
"S5_Ctrl",
|
|
95
|
+
"S6_Ctrl",
|
|
96
|
+
"S1_Drug",
|
|
97
|
+
"S2_Drug",
|
|
98
|
+
"S3_Drug",
|
|
99
|
+
"S4_Drug",
|
|
100
|
+
"S5_Drug",
|
|
101
|
+
"S6_Drug"
|
|
102
|
+
],
|
|
103
|
+
"rotation": 45,
|
|
104
|
+
"truncated": false,
|
|
105
|
+
"total_count": 24
|
|
106
|
+
},
|
|
107
|
+
"annotation_bars": [
|
|
108
|
+
{
|
|
109
|
+
"type": "column_annotation",
|
|
110
|
+
"position": "top",
|
|
111
|
+
"name": "Treatment",
|
|
112
|
+
"categories": [
|
|
113
|
+
{"label": "Control", "color": "gray", "columns": [0, 1, 2, 3, 4, 5]},
|
|
114
|
+
{"label": "Drug A", "color": "blue", "columns": [6, 7, 8, 9, 10, 11]},
|
|
115
|
+
{"label": "Drug B", "color": "orange", "columns": [12, 13, 14, 15, 16, 17]},
|
|
116
|
+
{"label": "Combination", "color": "purple", "columns": [18, 19, 20, 21, 22, 23]}
|
|
117
|
+
]
|
|
118
|
+
},
|
|
119
|
+
{
|
|
120
|
+
"type": "row_annotation",
|
|
121
|
+
"position": "right",
|
|
122
|
+
"name": "Pathway",
|
|
123
|
+
"categories": [
|
|
124
|
+
{"label": "Cell Cycle", "color": "green", "rows": [0, 1, 2, 3, 4]},
|
|
125
|
+
{"label": "Apoptosis", "color": "red", "rows": [5, 6, 7, 8, 9]}
|
|
126
|
+
]
|
|
127
|
+
}
|
|
128
|
+
],
|
|
129
|
+
"dendrograms": {
|
|
130
|
+
"row_dendrogram": {
|
|
131
|
+
"present": true,
|
|
132
|
+
"position": "left",
|
|
133
|
+
"major_clusters": [
|
|
134
|
+
{"cluster_id": 1, "row_range": [0, 24], "label": null},
|
|
135
|
+
{"cluster_id": 2, "row_range": [25, 49], "label": null}
|
|
136
|
+
]
|
|
137
|
+
},
|
|
138
|
+
"column_dendrogram": {
|
|
139
|
+
"present": true,
|
|
140
|
+
"position": "top",
|
|
141
|
+
"major_clusters": [
|
|
142
|
+
{"cluster_id": 1, "column_range": [0, 11], "label": "Cluster A"},
|
|
143
|
+
{"cluster_id": 2, "column_range": [12, 23], "label": "Cluster B"}
|
|
144
|
+
]
|
|
145
|
+
}
|
|
146
|
+
},
|
|
147
|
+
"cell_values": {
|
|
148
|
+
"shown": false,
|
|
149
|
+
"sample_values": null
|
|
150
|
+
},
|
|
151
|
+
"all_visible_text": [
|
|
152
|
+
"Figure 3. Differential Gene Expression Heatmap",
|
|
153
|
+
"Top 50 differentially expressed genes (FDR < 0.05)",
|
|
154
|
+
"log2(FC)",
|
|
155
|
+
"-3", "-2", "-1", "0", "1", "2", "3",
|
|
156
|
+
"Control", "Drug A", "Drug B", "Combination",
|
|
157
|
+
"Cell Cycle", "Apoptosis",
|
|
158
|
+
"BRCA1", "TP53", "EGFR", "MYC", "KRAS"
|
|
159
|
+
]
|
|
160
|
+
}
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
## Heatmap Type Specifications
|
|
164
|
+
|
|
165
|
+
### Expression Heatmap (Gene/Protein)
|
|
166
|
+
```json
|
|
167
|
+
{
|
|
168
|
+
"heatmap_type": "expression",
|
|
169
|
+
"color_scale": {
|
|
170
|
+
"type": "diverging",
|
|
171
|
+
"scale_label": "log2(FC)"
|
|
172
|
+
},
|
|
173
|
+
"row_labels": {"represents": "genes"},
|
|
174
|
+
"column_labels": {"represents": "samples"}
|
|
175
|
+
}
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
### Correlation Matrix
|
|
179
|
+
```json
|
|
180
|
+
{
|
|
181
|
+
"heatmap_type": "correlation",
|
|
182
|
+
"color_scale": {
|
|
183
|
+
"type": "diverging",
|
|
184
|
+
"min_value": -1.0,
|
|
185
|
+
"max_value": 1.0,
|
|
186
|
+
"scale_label": "Pearson r"
|
|
187
|
+
},
|
|
188
|
+
"symmetric": true,
|
|
189
|
+
"diagonal": "ones",
|
|
190
|
+
"cell_values": {
|
|
191
|
+
"shown": true,
|
|
192
|
+
"format": "two_decimal"
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
### P-value / Significance Heatmap
|
|
198
|
+
```json
|
|
199
|
+
{
|
|
200
|
+
"heatmap_type": "significance",
|
|
201
|
+
"color_scale": {
|
|
202
|
+
"type": "sequential",
|
|
203
|
+
"min_value": 0.0,
|
|
204
|
+
"max_value": 0.05,
|
|
205
|
+
"min_color": "dark red",
|
|
206
|
+
"max_color": "light pink",
|
|
207
|
+
"scale_label": "P-value"
|
|
208
|
+
},
|
|
209
|
+
"significance_threshold": 0.05,
|
|
210
|
+
"significant_cells_marked": true
|
|
211
|
+
}
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
### Enrichment Heatmap
|
|
215
|
+
```json
|
|
216
|
+
{
|
|
217
|
+
"heatmap_type": "enrichment",
|
|
218
|
+
"row_labels": {"represents": "pathways"},
|
|
219
|
+
"column_labels": {"represents": "comparisons"},
|
|
220
|
+
"cell_values": {
|
|
221
|
+
"shown": true,
|
|
222
|
+
"represents": "-log10(FDR)"
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
```
|
|
226
|
+
|
|
227
|
+
## Label Handling
|
|
228
|
+
|
|
229
|
+
### Complete Labels
|
|
230
|
+
If ALL labels are visible and readable:
|
|
231
|
+
```json
|
|
232
|
+
{
|
|
233
|
+
"row_labels": {
|
|
234
|
+
"labels": ["Gene1", "Gene2", "Gene3", ...all labels...],
|
|
235
|
+
"truncated": false,
|
|
236
|
+
"total_count": 50
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
### Partially Visible Labels
|
|
242
|
+
If some labels are cut off or too small:
|
|
243
|
+
```json
|
|
244
|
+
{
|
|
245
|
+
"row_labels": {
|
|
246
|
+
"labels": ["Gene1", "Gene2", "Gene3", "[unreadable]", "Gene5"],
|
|
247
|
+
"truncated": true,
|
|
248
|
+
"total_count": 50,
|
|
249
|
+
"readable_count": 35,
|
|
250
|
+
"truncation_note": "Labels in rows 15-20 too small to read"
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
```
|
|
254
|
+
|
|
255
|
+
### Label Groups
|
|
256
|
+
If labels are grouped:
|
|
257
|
+
```json
|
|
258
|
+
{
|
|
259
|
+
"row_labels": {
|
|
260
|
+
"labels": [...],
|
|
261
|
+
"groups": [
|
|
262
|
+
{"name": "Upregulated", "range": [0, 24]},
|
|
263
|
+
{"name": "Downregulated", "range": [25, 49]}
|
|
264
|
+
]
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
```
|
|
268
|
+
|
|
269
|
+
## Cell Value Extraction
|
|
270
|
+
|
|
271
|
+
If individual cell values are shown in the heatmap:
|
|
272
|
+
|
|
273
|
+
```json
|
|
274
|
+
{
|
|
275
|
+
"cell_values": {
|
|
276
|
+
"shown": true,
|
|
277
|
+
"format": "two_decimal",
|
|
278
|
+
"values": [
|
|
279
|
+
{"row": 0, "col": 0, "value": "2.34"},
|
|
280
|
+
{"row": 0, "col": 1, "value": "1.89"},
|
|
281
|
+
{"row": 0, "col": 2, "value": "-0.45"}
|
|
282
|
+
]
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
```
|
|
286
|
+
|
|
287
|
+
For large matrices, extract a representative sample:
|
|
288
|
+
```json
|
|
289
|
+
{
|
|
290
|
+
"cell_values": {
|
|
291
|
+
"shown": true,
|
|
292
|
+
"sampled": true,
|
|
293
|
+
"sample_strategy": "corners_and_extremes",
|
|
294
|
+
"sample_values": [
|
|
295
|
+
{"row": 0, "col": 0, "row_label": "BRCA1", "col_label": "S1_Ctrl", "value": "2.34"},
|
|
296
|
+
{"row": 0, "col": 23, "row_label": "BRCA1", "col_label": "S24_Comb", "value": "-1.56"}
|
|
297
|
+
]
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
```
|
|
301
|
+
|
|
302
|
+
## Quality Checklist
|
|
303
|
+
|
|
304
|
+
Before outputting, verify:
|
|
305
|
+
- [ ] Title and caption captured verbatim
|
|
306
|
+
- [ ] ALL row labels listed (or noted as truncated)
|
|
307
|
+
- [ ] ALL column labels listed (or noted as truncated)
|
|
308
|
+
- [ ] Color scale complete (min/max values AND colors)
|
|
309
|
+
- [ ] Scale bar tick values exact
|
|
310
|
+
- [ ] Annotation bars documented with colors
|
|
311
|
+
- [ ] Dendrograms noted if present
|
|
312
|
+
- [ ] Cell values extracted if visible
|
|
313
|
+
- [ ] `all_visible_text` comprehensive
|
|
314
|
+
|
|
315
|
+
## Output Rules
|
|
316
|
+
|
|
317
|
+
1. Return ONLY the JSON object
|
|
318
|
+
2. No markdown code fences
|
|
319
|
+
3. No explanatory text
|
|
320
|
+
4. All text values verbatim from image
|
|
321
|
+
5. Use `null` for missing optional fields
|
|
322
|
+
6. If labels are too numerous, extract all you can see and note `truncated: true`
|