recombinase 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- recombinase-0.1.0/.gitignore +13 -0
- recombinase-0.1.0/LICENSE +21 -0
- recombinase-0.1.0/PKG-INFO +198 -0
- recombinase-0.1.0/README.md +168 -0
- recombinase-0.1.0/examples/consultant.example.yaml +46 -0
- recombinase-0.1.0/examples/template-config.example.yaml +33 -0
- recombinase-0.1.0/pyproject.toml +53 -0
- recombinase-0.1.0/src/recombinase/__init__.py +21 -0
- recombinase-0.1.0/src/recombinase/cli.py +178 -0
- recombinase-0.1.0/src/recombinase/config.py +134 -0
- recombinase-0.1.0/src/recombinase/generate.py +222 -0
- recombinase-0.1.0/src/recombinase/inspect.py +143 -0
- recombinase-0.1.0/tests/test_end_to_end.py +362 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Terry Li
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: recombinase
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Template-guided document synthesis: extract structured content from source pptx files and recombine into a canonical template
|
|
5
|
+
Project-URL: Homepage, https://github.com/terry-li-hm/recombinase
|
|
6
|
+
Project-URL: Issues, https://github.com/terry-li-hm/recombinase/issues
|
|
7
|
+
Author: Terry Li
|
|
8
|
+
License-Expression: MIT
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Keywords: credentials,cv,extraction,generation,powerpoint,pptx,template
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: End Users/Desktop
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Operating System :: OS Independent
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
21
|
+
Classifier: Topic :: Office/Business
|
|
22
|
+
Classifier: Topic :: Text Processing
|
|
23
|
+
Requires-Python: >=3.10
|
|
24
|
+
Requires-Dist: python-pptx>=0.6.23
|
|
25
|
+
Requires-Dist: pyyaml>=6.0
|
|
26
|
+
Provides-Extra: dev
|
|
27
|
+
Requires-Dist: pytest-cov>=4.0; extra == 'dev'
|
|
28
|
+
Requires-Dist: pytest>=7.0; extra == 'dev'
|
|
29
|
+
Description-Content-Type: text/markdown
|
|
30
|
+
|
|
31
|
+
# recombinase
|
|
32
|
+
|
|
33
|
+
> Biology: a recombinase is an enzyme that extracts DNA fragments and recombines them into new molecules using a homologous template as the structural guide. This package does the same for PowerPoint documents.
|
|
34
|
+
|
|
35
|
+
Template-guided pptx synthesis. Take a styled "filled example" slide in a `.pptx`/`.pptm` template, a folder of per-record YAML data files, and produce a populated output deck — one slide per record, visually identical to the template because the fill operation duplicates the source slide and replaces text in-place by shape name.
|
|
36
|
+
|
|
37
|
+
## Install
|
|
38
|
+
|
|
39
|
+
```
|
|
40
|
+
pip install --user recombinase
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
Or from source:
|
|
44
|
+
|
|
45
|
+
```
|
|
46
|
+
git clone https://github.com/terry-li-hm/recombinase.git
|
|
47
|
+
cd recombinase
|
|
48
|
+
pip install --user -e .
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
Dependencies: `python-pptx`, `pyyaml`. That's it.
|
|
52
|
+
|
|
53
|
+
## Concepts
|
|
54
|
+
|
|
55
|
+
**Three steps**, loosely coupled by file:
|
|
56
|
+
|
|
57
|
+
1. **Template** — a `.pptx`/`.pptm` file with at least one slide where every field you want to populate is a named shape (e.g. a text box named `Consultant_Name`).
|
|
58
|
+
2. **Config** — a small YAML file declaring which shape name on the template corresponds to which data field. One config per template. Templates change; configs go with them.
|
|
59
|
+
3. **Data** — a directory of per-record YAML files, one file per record (e.g. one per consultant). Each file has a flat map of field names to values. List values become bullet paragraphs automatically.
|
|
60
|
+
|
|
61
|
+
The template config is intentionally per-template rather than hardcoded in the package. Same package, different config → different template. You can build a CV pack, a use-case slide deck, or a client case study collection with the same tool and three different configs.
|
|
62
|
+
|
|
63
|
+
## Usage
|
|
64
|
+
|
|
65
|
+
### 1. Inspect a template
|
|
66
|
+
|
|
67
|
+
Discover the shape names on each slide — structural metadata only, never the actual text content. Safe to share the output.
|
|
68
|
+
|
|
69
|
+
```
|
|
70
|
+
recombinase inspect "path/to/template.pptm"
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
Example output:
|
|
74
|
+
|
|
75
|
+
```
|
|
76
|
+
File: /path/to/template.pptm
|
|
77
|
+
Slide count: 1
|
|
78
|
+
|
|
79
|
+
=== Slide 1 (layout: 'Blank') ===
|
|
80
|
+
- 'Consultant_Name' | type=TEXT_BOX (17) | text_chars=12 | paras=1, runs=1
|
|
81
|
+
- 'Role_Title' | type=TEXT_BOX (17) | text_chars=18 | paras=1, runs=1
|
|
82
|
+
- 'Summary_Body' | type=TEXT_BOX (17) | text_chars=140 | paras=2, runs=3
|
|
83
|
+
- 'Background_Bullets' | type=TEXT_BOX (17) | text_chars=220 | paras=5, runs=5
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
### 2. Scaffold a config
|
|
87
|
+
|
|
88
|
+
Generate a starter config file from the template's shape names:
|
|
89
|
+
|
|
90
|
+
```
|
|
91
|
+
recombinase init "path/to/template.pptm" --output template-config.yaml
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
This reads the shape names from slide 1 and writes a config like:
|
|
95
|
+
|
|
96
|
+
```yaml
|
|
97
|
+
template: /path/to/template.pptm
|
|
98
|
+
source_slide_index: 1
|
|
99
|
+
clear_source_slide: true
|
|
100
|
+
|
|
101
|
+
placeholders:
|
|
102
|
+
consultant_name: Consultant_Name
|
|
103
|
+
role_title: Role_Title
|
|
104
|
+
summary_body: Summary_Body
|
|
105
|
+
background_bullets: Background_Bullets
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
Edit the left side (data field names) to match how your records are keyed. For example, if your YAML data files use `name:` not `consultant_name:`, rename the left side:
|
|
109
|
+
|
|
110
|
+
```yaml
|
|
111
|
+
placeholders:
|
|
112
|
+
name: Consultant_Name
|
|
113
|
+
role: Role_Title
|
|
114
|
+
summary: Summary_Body
|
|
115
|
+
background: Background_Bullets
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
### 3. Write per-record data files
|
|
119
|
+
|
|
120
|
+
Create a directory with one YAML file per record. Filenames become the sort order:
|
|
121
|
+
|
|
122
|
+
```
|
|
123
|
+
cv-data/
|
|
124
|
+
├── 01-jane-doe.yaml
|
|
125
|
+
├── 02-john-smith.yaml
|
|
126
|
+
└── 03-alice-wong.yaml
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
Each file is a flat map — lists become bullet paragraphs:
|
|
130
|
+
|
|
131
|
+
```yaml
|
|
132
|
+
id: jane-doe
|
|
133
|
+
name: Jane Doe
|
|
134
|
+
role: Senior Consultant
|
|
135
|
+
summary: >-
|
|
136
|
+
Twelve years across global wealth management with a focus on
|
|
137
|
+
regulatory data and risk modelling.
|
|
138
|
+
background:
|
|
139
|
+
- Bank A — Risk modelling lead (2010-2015)
|
|
140
|
+
- Bank B — Head of analytics (2015-2020)
|
|
141
|
+
- Bank C — CDO, Asia Pacific (2020-present)
|
|
142
|
+
key_skills:
|
|
143
|
+
- Risk modelling
|
|
144
|
+
- Governance
|
|
145
|
+
- Wealth data architecture
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
The field names on the left must match the keys in your template config's `placeholders:` section.
|
|
149
|
+
|
|
150
|
+
### 4. Generate the output deck
|
|
151
|
+
|
|
152
|
+
```
|
|
153
|
+
recombinase generate \
|
|
154
|
+
--config template-config.yaml \
|
|
155
|
+
--data-dir cv-data/ \
|
|
156
|
+
--output output/deck.pptx
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
Produces a populated pptx with one slide per YAML file. If `clear_source_slide: true` in the config, the original example slide is removed from the output.
|
|
160
|
+
|
|
161
|
+
### One-line end-to-end
|
|
162
|
+
|
|
163
|
+
After the config exists:
|
|
164
|
+
|
|
165
|
+
```
|
|
166
|
+
recombinase generate -c template-config.yaml -d cv-data/ -o out.pptx
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
## Design notes
|
|
170
|
+
|
|
171
|
+
### Why duplicate a filled example slide?
|
|
172
|
+
|
|
173
|
+
The alternative is creating slides from a layout and writing text into empty placeholders. That approach loses any hand-tweaks the template designer made (custom colours, tweaked positions, decorative shapes, non-placeholder elements). Duplicating a known-good filled slide inherits 100% of its visual styling by design — `deepcopy` of the shape tree carries every property.
|
|
174
|
+
|
|
175
|
+
Trade-off: the template must contain one "canonical good example" slide to clone from. This is usually natural for CV templates and pack-prep work.
|
|
176
|
+
|
|
177
|
+
### Rich text and the flattening caveat
|
|
178
|
+
|
|
179
|
+
When a value is written into a shape with `shape.text_frame.text = "..."`, rich-text runs within that shape (bold name, italic subtitle in one text frame) collapse to the placeholder's default run style. For most modern consulting templates this isn't an issue — each styled fragment lives in its own shape. If your template has a multi-run placeholder, either split it into separate shapes or accept the flattening.
|
|
180
|
+
|
|
181
|
+
### Variable-length lists
|
|
182
|
+
|
|
183
|
+
List values in the YAML data become separate paragraphs in the target text frame, inheriting the placeholder's paragraph-level bullet formatting automatically. No bullet markers in the source data — the template supplies them. A consultant with three background bullets and another with seven both work without any config change.
|
|
184
|
+
|
|
185
|
+
### Warnings, not errors
|
|
186
|
+
|
|
187
|
+
If a config references a shape name that doesn't exist, or a record is missing a field, `generate` produces a **warning** but continues. This is deliberate: partial output is more useful than total failure during iteration. Pass `--strict` if you want non-zero exit on warnings.
|
|
188
|
+
|
|
189
|
+
## Scope (v0.1)
|
|
190
|
+
|
|
191
|
+
- ✓ Inspect: print template structural metadata
|
|
192
|
+
- ✓ Init: scaffold a config from shape names
|
|
193
|
+
- ✓ Generate: populate template from YAML records
|
|
194
|
+
- ✗ Extract: reverse direction (pptx → YAML) — v0.2 — needs a sample source file for structure before it can be implemented reliably
|
|
195
|
+
|
|
196
|
+
## License
|
|
197
|
+
|
|
198
|
+
MIT
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
# recombinase
|
|
2
|
+
|
|
3
|
+
> Biology: a recombinase is an enzyme that extracts DNA fragments and recombines them into new molecules using a homologous template as the structural guide. This package does the same for PowerPoint documents.
|
|
4
|
+
|
|
5
|
+
Template-guided pptx synthesis. Take a styled "filled example" slide in a `.pptx`/`.pptm` template, a folder of per-record YAML data files, and produce a populated output deck — one slide per record, visually identical to the template because the fill operation duplicates the source slide and replaces text in-place by shape name.
|
|
6
|
+
|
|
7
|
+
## Install
|
|
8
|
+
|
|
9
|
+
```
|
|
10
|
+
pip install --user recombinase
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
Or from source:
|
|
14
|
+
|
|
15
|
+
```
|
|
16
|
+
git clone https://github.com/terry-li-hm/recombinase.git
|
|
17
|
+
cd recombinase
|
|
18
|
+
pip install --user -e .
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
Dependencies: `python-pptx`, `pyyaml`. That's it.
|
|
22
|
+
|
|
23
|
+
## Concepts
|
|
24
|
+
|
|
25
|
+
**Three steps**, loosely coupled by file:
|
|
26
|
+
|
|
27
|
+
1. **Template** — a `.pptx`/`.pptm` file with at least one slide where every field you want to populate is a named shape (e.g. a text box named `Consultant_Name`).
|
|
28
|
+
2. **Config** — a small YAML file declaring which shape name on the template corresponds to which data field. One config per template. Templates change; configs go with them.
|
|
29
|
+
3. **Data** — a directory of per-record YAML files, one file per record (e.g. one per consultant). Each file has a flat map of field names to values. List values become bullet paragraphs automatically.
|
|
30
|
+
|
|
31
|
+
The template config is intentionally per-template rather than hardcoded in the package. Same package, different config → different template. You can build a CV pack, a use-case slide deck, or a client case study collection with the same tool and three different configs.
|
|
32
|
+
|
|
33
|
+
## Usage
|
|
34
|
+
|
|
35
|
+
### 1. Inspect a template
|
|
36
|
+
|
|
37
|
+
Discover the shape names on each slide — structural metadata only, never the actual text content. Safe to share the output.
|
|
38
|
+
|
|
39
|
+
```
|
|
40
|
+
recombinase inspect "path/to/template.pptm"
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
Example output:
|
|
44
|
+
|
|
45
|
+
```
|
|
46
|
+
File: /path/to/template.pptm
|
|
47
|
+
Slide count: 1
|
|
48
|
+
|
|
49
|
+
=== Slide 1 (layout: 'Blank') ===
|
|
50
|
+
- 'Consultant_Name' | type=TEXT_BOX (17) | text_chars=12 | paras=1, runs=1
|
|
51
|
+
- 'Role_Title' | type=TEXT_BOX (17) | text_chars=18 | paras=1, runs=1
|
|
52
|
+
- 'Summary_Body' | type=TEXT_BOX (17) | text_chars=140 | paras=2, runs=3
|
|
53
|
+
- 'Background_Bullets' | type=TEXT_BOX (17) | text_chars=220 | paras=5, runs=5
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
### 2. Scaffold a config
|
|
57
|
+
|
|
58
|
+
Generate a starter config file from the template's shape names:
|
|
59
|
+
|
|
60
|
+
```
|
|
61
|
+
recombinase init "path/to/template.pptm" --output template-config.yaml
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
This reads the shape names from slide 1 and writes a config like:
|
|
65
|
+
|
|
66
|
+
```yaml
|
|
67
|
+
template: /path/to/template.pptm
|
|
68
|
+
source_slide_index: 1
|
|
69
|
+
clear_source_slide: true
|
|
70
|
+
|
|
71
|
+
placeholders:
|
|
72
|
+
consultant_name: Consultant_Name
|
|
73
|
+
role_title: Role_Title
|
|
74
|
+
summary_body: Summary_Body
|
|
75
|
+
background_bullets: Background_Bullets
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
Edit the left side (data field names) to match how your records are keyed. For example, if your YAML data files use `name:` not `consultant_name:`, rename the left side:
|
|
79
|
+
|
|
80
|
+
```yaml
|
|
81
|
+
placeholders:
|
|
82
|
+
name: Consultant_Name
|
|
83
|
+
role: Role_Title
|
|
84
|
+
summary: Summary_Body
|
|
85
|
+
background: Background_Bullets
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
### 3. Write per-record data files
|
|
89
|
+
|
|
90
|
+
Create a directory with one YAML file per record. Filenames become the sort order:
|
|
91
|
+
|
|
92
|
+
```
|
|
93
|
+
cv-data/
|
|
94
|
+
├── 01-jane-doe.yaml
|
|
95
|
+
├── 02-john-smith.yaml
|
|
96
|
+
└── 03-alice-wong.yaml
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
Each file is a flat map — lists become bullet paragraphs:
|
|
100
|
+
|
|
101
|
+
```yaml
|
|
102
|
+
id: jane-doe
|
|
103
|
+
name: Jane Doe
|
|
104
|
+
role: Senior Consultant
|
|
105
|
+
summary: >-
|
|
106
|
+
Twelve years across global wealth management with a focus on
|
|
107
|
+
regulatory data and risk modelling.
|
|
108
|
+
background:
|
|
109
|
+
- Bank A — Risk modelling lead (2010-2015)
|
|
110
|
+
- Bank B — Head of analytics (2015-2020)
|
|
111
|
+
- Bank C — CDO, Asia Pacific (2020-present)
|
|
112
|
+
key_skills:
|
|
113
|
+
- Risk modelling
|
|
114
|
+
- Governance
|
|
115
|
+
- Wealth data architecture
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
The field names on the left must match the keys in your template config's `placeholders:` section.
|
|
119
|
+
|
|
120
|
+
### 4. Generate the output deck
|
|
121
|
+
|
|
122
|
+
```
|
|
123
|
+
recombinase generate \
|
|
124
|
+
--config template-config.yaml \
|
|
125
|
+
--data-dir cv-data/ \
|
|
126
|
+
--output output/deck.pptx
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
Produces a populated pptx with one slide per YAML file. If `clear_source_slide: true` in the config, the original example slide is removed from the output.
|
|
130
|
+
|
|
131
|
+
### One-line end-to-end
|
|
132
|
+
|
|
133
|
+
After the config exists:
|
|
134
|
+
|
|
135
|
+
```
|
|
136
|
+
recombinase generate -c template-config.yaml -d cv-data/ -o out.pptx
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
## Design notes
|
|
140
|
+
|
|
141
|
+
### Why duplicate a filled example slide?
|
|
142
|
+
|
|
143
|
+
The alternative is creating slides from a layout and writing text into empty placeholders. That approach loses any hand-tweaks the template designer made (custom colours, tweaked positions, decorative shapes, non-placeholder elements). Duplicating a known-good filled slide inherits 100% of its visual styling by design — `deepcopy` of the shape tree carries every property.
|
|
144
|
+
|
|
145
|
+
Trade-off: the template must contain one "canonical good example" slide to clone from. This is usually natural for CV templates and pack-prep work.
|
|
146
|
+
|
|
147
|
+
### Rich text and the flattening caveat
|
|
148
|
+
|
|
149
|
+
When a value is written into a shape with `shape.text_frame.text = "..."`, rich-text runs within that shape (bold name, italic subtitle in one text frame) collapse to the placeholder's default run style. For most modern consulting templates this isn't an issue — each styled fragment lives in its own shape. If your template has a multi-run placeholder, either split it into separate shapes or accept the flattening.
|
|
150
|
+
|
|
151
|
+
### Variable-length lists
|
|
152
|
+
|
|
153
|
+
List values in the YAML data become separate paragraphs in the target text frame, inheriting the placeholder's paragraph-level bullet formatting automatically. No bullet markers in the source data — the template supplies them. A consultant with three background bullets and another with seven both work without any config change.
|
|
154
|
+
|
|
155
|
+
### Warnings, not errors
|
|
156
|
+
|
|
157
|
+
If a config references a shape name that doesn't exist, or a record is missing a field, `generate` produces a **warning** but continues. This is deliberate: partial output is more useful than total failure during iteration. Pass `--strict` if you want non-zero exit on warnings.
|
|
158
|
+
|
|
159
|
+
## Scope (v0.1)
|
|
160
|
+
|
|
161
|
+
- ✓ Inspect: print template structural metadata
|
|
162
|
+
- ✓ Init: scaffold a config from shape names
|
|
163
|
+
- ✓ Generate: populate template from YAML records
|
|
164
|
+
- ✗ Extract: reverse direction (pptx → YAML) — v0.2 — needs a sample source file for structure before it can be implemented reliably
|
|
165
|
+
|
|
166
|
+
## License
|
|
167
|
+
|
|
168
|
+
MIT
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# Example per-record data file for recombinase.
|
|
2
|
+
#
|
|
3
|
+
# One file per record (e.g. one per consultant). File names become the sort
|
|
4
|
+
# order in the output deck, so prefix with 01-, 02-, etc. if you want a
|
|
5
|
+
# specific ordering.
|
|
6
|
+
#
|
|
7
|
+
# The field names on the LEFT must match the keys in your template config's
|
|
8
|
+
# `placeholders:` section. Values can be scalar strings, integers, or lists.
|
|
9
|
+
# Lists become separate paragraphs (bullet points) in the target shape,
|
|
10
|
+
# inheriting the template's paragraph-level bullet formatting automatically.
|
|
11
|
+
|
|
12
|
+
id: jane-doe
|
|
13
|
+
name: Jane Doe
|
|
14
|
+
role: Senior Consultant
|
|
15
|
+
years_experience: 12
|
|
16
|
+
|
|
17
|
+
summary: >-
|
|
18
|
+
Wealth-data specialist with twelve years across global banking and
|
|
19
|
+
consulting. Led data governance and AI strategy engagements at
|
|
20
|
+
tier-1 APAC financial institutions.
|
|
21
|
+
|
|
22
|
+
background:
|
|
23
|
+
- Bank A — Risk modelling lead (2010-2015)
|
|
24
|
+
- Bank B — Head of analytics (2015-2020)
|
|
25
|
+
- Bank C — Chief Data Officer, Asia Pacific (2020-present)
|
|
26
|
+
|
|
27
|
+
key_skills:
|
|
28
|
+
- Risk modelling and stress testing
|
|
29
|
+
- AI governance and model risk management
|
|
30
|
+
- Wealth data architecture
|
|
31
|
+
- Regulatory reporting automation
|
|
32
|
+
|
|
33
|
+
clients:
|
|
34
|
+
- HSBC
|
|
35
|
+
- UBS
|
|
36
|
+
- DBS
|
|
37
|
+
|
|
38
|
+
qualifications:
|
|
39
|
+
- CFA Charterholder
|
|
40
|
+
- FRM
|
|
41
|
+
- MSc Financial Engineering
|
|
42
|
+
|
|
43
|
+
languages:
|
|
44
|
+
- English (native)
|
|
45
|
+
- Cantonese (fluent)
|
|
46
|
+
- Mandarin (working)
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# Example template config for recombinase.
|
|
2
|
+
#
|
|
3
|
+
# One config per pptx template. The `template:` path can be absolute or
|
|
4
|
+
# relative to this config file's directory. The `placeholders:` section
|
|
5
|
+
# maps data field names (left) to the shape .Name property in the template
|
|
6
|
+
# (right). You find the shape names by running `recombinase inspect` or
|
|
7
|
+
# `recombinase init` on the template first.
|
|
8
|
+
|
|
9
|
+
template: ./CV_template.pptx
|
|
10
|
+
|
|
11
|
+
# 1-based index of the slide inside the template that should be duplicated
|
|
12
|
+
# once per record. Usually this is the "filled example" slide that's already
|
|
13
|
+
# styled correctly.
|
|
14
|
+
source_slide_index: 1
|
|
15
|
+
|
|
16
|
+
# Remove the source (example) slide from the final output so only the
|
|
17
|
+
# generated per-record slides remain. Set to false if you want the example
|
|
18
|
+
# preserved as slide 1 of the output for reference.
|
|
19
|
+
clear_source_slide: true
|
|
20
|
+
|
|
21
|
+
# Map from data field name → shape .Name in the template.
|
|
22
|
+
# The LEFT side must match the keys in your per-record YAML data files.
|
|
23
|
+
# The RIGHT side must match the shape names reported by `recombinase inspect`.
|
|
24
|
+
placeholders:
|
|
25
|
+
name: Consultant_Name
|
|
26
|
+
role: Role_Title
|
|
27
|
+
years_experience: Years_Experience
|
|
28
|
+
summary: Summary_Body
|
|
29
|
+
background: Background_Bullets
|
|
30
|
+
key_skills: Key_Skills
|
|
31
|
+
clients: Clients_Served
|
|
32
|
+
qualifications: Qualifications
|
|
33
|
+
languages: Languages
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "recombinase"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Template-guided document synthesis: extract structured content from source pptx files and recombine into a canonical template"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
|
+
license = "MIT"
|
|
12
|
+
authors = [
|
|
13
|
+
{ name = "Terry Li" },
|
|
14
|
+
]
|
|
15
|
+
keywords = ["pptx", "powerpoint", "template", "cv", "credentials", "extraction", "generation"]
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Development Status :: 3 - Alpha",
|
|
18
|
+
"Intended Audience :: End Users/Desktop",
|
|
19
|
+
"License :: OSI Approved :: MIT License",
|
|
20
|
+
"Operating System :: OS Independent",
|
|
21
|
+
"Programming Language :: Python :: 3",
|
|
22
|
+
"Programming Language :: Python :: 3 :: Only",
|
|
23
|
+
"Programming Language :: Python :: 3.10",
|
|
24
|
+
"Programming Language :: Python :: 3.11",
|
|
25
|
+
"Programming Language :: Python :: 3.12",
|
|
26
|
+
"Programming Language :: Python :: 3.13",
|
|
27
|
+
"Topic :: Office/Business",
|
|
28
|
+
"Topic :: Text Processing",
|
|
29
|
+
]
|
|
30
|
+
dependencies = [
|
|
31
|
+
"python-pptx>=0.6.23",
|
|
32
|
+
"pyyaml>=6.0",
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
[project.optional-dependencies]
|
|
36
|
+
dev = [
|
|
37
|
+
"pytest>=7.0",
|
|
38
|
+
"pytest-cov>=4.0",
|
|
39
|
+
]
|
|
40
|
+
|
|
41
|
+
[project.scripts]
|
|
42
|
+
recombinase = "recombinase.cli:main"
|
|
43
|
+
|
|
44
|
+
[project.urls]
|
|
45
|
+
Homepage = "https://github.com/terry-li-hm/recombinase"
|
|
46
|
+
Issues = "https://github.com/terry-li-hm/recombinase/issues"
|
|
47
|
+
|
|
48
|
+
[tool.hatch.build.targets.wheel]
|
|
49
|
+
packages = ["src/recombinase"]
|
|
50
|
+
|
|
51
|
+
[tool.ruff]
|
|
52
|
+
target-version = "py312"
|
|
53
|
+
line-length = 100
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""recombinase — template-guided document synthesis.
|
|
2
|
+
|
|
3
|
+
Biology: a recombinase is an enzyme that extracts DNA fragments and recombines
|
|
4
|
+
them into new molecules using a homologous template as the structural guide.
|
|
5
|
+
This package does the same for PowerPoint documents: extract content from
|
|
6
|
+
heterogeneous source files, then recombine into a canonical template.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from recombinase.config import TemplateConfig, load_config
|
|
10
|
+
from recombinase.generate import generate_deck
|
|
11
|
+
from recombinase.inspect import inspect_template
|
|
12
|
+
|
|
13
|
+
__version__ = "0.1.0"
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
"TemplateConfig",
|
|
17
|
+
"load_config",
|
|
18
|
+
"generate_deck",
|
|
19
|
+
"inspect_template",
|
|
20
|
+
"__version__",
|
|
21
|
+
]
|