pdform 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pdform-0.2.0/PKG-INFO +141 -0
- pdform-0.2.0/README.rst +123 -0
- pdform-0.2.0/pyproject.toml +29 -0
- pdform-0.2.0/src/pdform/__init__.py +0 -0
- pdform-0.2.0/src/pdform/__main__.py +3 -0
- pdform-0.2.0/src/pdform/cli.py +12 -0
- pdform-0.2.0/src/pdform/describe.py +124 -0
- pdform-0.2.0/src/pdform/fill_form.py +116 -0
- pdform-0.2.0/src/pdform/make_html/__init__.py +3 -0
- pdform-0.2.0/src/pdform/make_html/cli.py +26 -0
- pdform-0.2.0/src/pdform/make_html/field_renderer.py +225 -0
- pdform-0.2.0/src/pdform/make_html/make_html.py +112 -0
- pdform-0.2.0/src/pdform/make_html/process_form.py +164 -0
- pdform-0.2.0/src/pdform/make_html/template_soup.py +51 -0
pdform-0.2.0/PKG-INFO
ADDED
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: pdform
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: A library and command-line tool for working with PDF interactive forms. Uses pikepdf and pdf2htmlex.
|
|
5
|
+
Author: Dominick Johnson
|
|
6
|
+
Author-email: dominick.johnson@tylertech.com
|
|
7
|
+
Requires-Python: >=3.11
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
12
|
+
Requires-Dist: beautifulsoup4 (>=4.13.4,<5.0.0)
|
|
13
|
+
Requires-Dist: click (>=8.2.0,<9.0.0)
|
|
14
|
+
Requires-Dist: pikepdf (>=9.8.0,<10.0.0)
|
|
15
|
+
Requires-Dist: pillow (>=11.2.1,<12.0.0)
|
|
16
|
+
Project-URL: Homepage, https://github.com/dmjohnsson23/pdform-pikpdf
|
|
17
|
+
Description-Content-Type: text/x-rst
|
|
18
|
+
|
|
19
|
+
======
|
|
20
|
+
PDForm
|
|
21
|
+
======
|
|
22
|
+
|
|
23
|
+
A library and command-line tool for working with PDF interactive forms. It can:
|
|
24
|
+
|
|
25
|
+
* Describe the available fields in the form
|
|
26
|
+
* Convert the PDF to an HTML form
|
|
27
|
+
* Populate the PDF form with data
|
|
28
|
+
|
|
29
|
+
Uses `Pikepdf <https://pikepdf.readthedocs.io/en/latest/index.html>`_.
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
----------------
|
|
33
|
+
Describing Forms
|
|
34
|
+
----------------
|
|
35
|
+
|
|
36
|
+
The `pdform describe` command can be used to get information about a PDF form, such as the names and types of fields, allowable options, and so forth. Use `pdform describe --help` for command-line options.
|
|
37
|
+
|
|
38
|
+
.. code-block:: shell
|
|
39
|
+
|
|
40
|
+
pdform describe form.pdf
|
|
41
|
+
|
|
42
|
+
By default, it will show every field in the form, together with all the relevant details about the field. Command-line options exist to filter this view for easier parsing.
|
|
43
|
+
|
|
44
|
+
.. code-block::
|
|
45
|
+
|
|
46
|
+
=========================================================================
|
|
47
|
+
stream <_io.BufferedReader name='../../pikepdf/tests/resources/form.pdf'>
|
|
48
|
+
=========================================================================
|
|
49
|
+
|
|
50
|
+
Text1
|
|
51
|
+
-----
|
|
52
|
+
|
|
53
|
+
Label:
|
|
54
|
+
Text1
|
|
55
|
+
|
|
56
|
+
Type:
|
|
57
|
+
TextField
|
|
58
|
+
|
|
59
|
+
Required:
|
|
60
|
+
No
|
|
61
|
+
|
|
62
|
+
Read Only:
|
|
63
|
+
No
|
|
64
|
+
|
|
65
|
+
Multiline:
|
|
66
|
+
No
|
|
67
|
+
|
|
68
|
+
Max Length:
|
|
69
|
+
None
|
|
70
|
+
|
|
71
|
+
Default Value:
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
Current Value:
|
|
75
|
+
|
|
76
|
+
... and so on ...
|
|
77
|
+
|
|
78
|
+
-------------
|
|
79
|
+
Filling Forms
|
|
80
|
+
-------------
|
|
81
|
+
|
|
82
|
+
Filling forms is done using the `pdform fill-form` command. Typically, this will be done using JSON-formatted data, such as:
|
|
83
|
+
|
|
84
|
+
.. code-block:: json
|
|
85
|
+
|
|
86
|
+
{
|
|
87
|
+
"TextField1": "Some Text",
|
|
88
|
+
"Checkbox1": true,
|
|
89
|
+
"RadioGroup1": "3",
|
|
90
|
+
"ChoiceField1": "Option 4",
|
|
91
|
+
"SignatureField": "/home/myself/signature.png"
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
You can then call the command with this JSON:
|
|
95
|
+
|
|
96
|
+
.. code-block:: shell
|
|
97
|
+
|
|
98
|
+
pdform fill-form template.pdf output.pdf data.json
|
|
99
|
+
|
|
100
|
+
Or pipe this JSON into the command:
|
|
101
|
+
|
|
102
|
+
.. code-block:: shell
|
|
103
|
+
|
|
104
|
+
echo {your json here} | pdform fill-form template.pdf output.pdf -
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
------------------
|
|
108
|
+
Converting to HTML
|
|
109
|
+
------------------
|
|
110
|
+
|
|
111
|
+
Converting to HTML relies on `pdf2htmlEX <https://pdf2htmlex.github.io/pdf2htmlEX/>`_ to generate the initial HTML. We then use `BeautifulSoup <https://beautiful-soup-4.readthedocs.io/en/latest/>`_ to strip away most of the unnessesary code, and add the form fields.
|
|
112
|
+
|
|
113
|
+
This function can be activated in one of two ways:
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
1. The `pdform make-html` command. Use `pdform make-html --help` for details.
|
|
117
|
+
2. Directly via Python.
|
|
118
|
+
|
|
119
|
+
The command-line interface is sufficient for basic usage. It provides a handful of different output formats: plain HTML, Jinja, and PHP.
|
|
120
|
+
|
|
121
|
+
.. code-block:: shell
|
|
122
|
+
|
|
123
|
+
pdform make-html --jinja input.pdf output.jinja
|
|
124
|
+
|
|
125
|
+
However, it is likely you may wish to customize the rendered HTML. The Python interfaces gives much more flexibility for this.
|
|
126
|
+
|
|
127
|
+
.. code-block:: python
|
|
128
|
+
|
|
129
|
+
from pdform.make_html import FieldRenderer, make_html
|
|
130
|
+
|
|
131
|
+
# Define your own field renderer to control the emitted code for form fields
|
|
132
|
+
class MyFieldRenderer(FieldRenderer):
|
|
133
|
+
# See the source code for details on how to implement this class
|
|
134
|
+
...
|
|
135
|
+
|
|
136
|
+
soup = make_html(path, field_renderer_class=MyFieldRenderer)
|
|
137
|
+
# Use the BeautifulSoup object to perform any post-processing to the generated HTML
|
|
138
|
+
# (See the BeautifulSoup documentation for how to use it to manipulate the DOM)
|
|
139
|
+
...
|
|
140
|
+
# Output the rendered HTML
|
|
141
|
+
print(soup.prettify())
|
pdform-0.2.0/README.rst
ADDED
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
======
|
|
2
|
+
PDForm
|
|
3
|
+
======
|
|
4
|
+
|
|
5
|
+
A library and command-line tool for working with PDF interactive forms. It can:
|
|
6
|
+
|
|
7
|
+
* Describe the available fields in the form
|
|
8
|
+
* Convert the PDF to an HTML form
|
|
9
|
+
* Populate the PDF form with data
|
|
10
|
+
|
|
11
|
+
Uses `Pikepdf <https://pikepdf.readthedocs.io/en/latest/index.html>`_.
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
----------------
|
|
15
|
+
Describing Forms
|
|
16
|
+
----------------
|
|
17
|
+
|
|
18
|
+
The `pdform describe` command can be used to get information about a PDF form, such as the names and types of fields, allowable options, and so forth. Use `pdform describe --help` for command-line options.
|
|
19
|
+
|
|
20
|
+
.. code-block:: shell
|
|
21
|
+
|
|
22
|
+
pdform describe form.pdf
|
|
23
|
+
|
|
24
|
+
By default, it will show every field in the form, together with all the relevant details about the field. Command-line options exist to filter this view for easier parsing.
|
|
25
|
+
|
|
26
|
+
.. code-block::
|
|
27
|
+
|
|
28
|
+
=========================================================================
|
|
29
|
+
stream <_io.BufferedReader name='../../pikepdf/tests/resources/form.pdf'>
|
|
30
|
+
=========================================================================
|
|
31
|
+
|
|
32
|
+
Text1
|
|
33
|
+
-----
|
|
34
|
+
|
|
35
|
+
Label:
|
|
36
|
+
Text1
|
|
37
|
+
|
|
38
|
+
Type:
|
|
39
|
+
TextField
|
|
40
|
+
|
|
41
|
+
Required:
|
|
42
|
+
No
|
|
43
|
+
|
|
44
|
+
Read Only:
|
|
45
|
+
No
|
|
46
|
+
|
|
47
|
+
Multiline:
|
|
48
|
+
No
|
|
49
|
+
|
|
50
|
+
Max Length:
|
|
51
|
+
None
|
|
52
|
+
|
|
53
|
+
Default Value:
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
Current Value:
|
|
57
|
+
|
|
58
|
+
... and so on ...
|
|
59
|
+
|
|
60
|
+
-------------
|
|
61
|
+
Filling Forms
|
|
62
|
+
-------------
|
|
63
|
+
|
|
64
|
+
Filling forms is done using the `pdform fill-form` command. Typically, this will be done using JSON-formatted data, such as:
|
|
65
|
+
|
|
66
|
+
.. code-block:: json
|
|
67
|
+
|
|
68
|
+
{
|
|
69
|
+
"TextField1": "Some Text",
|
|
70
|
+
"Checkbox1": true,
|
|
71
|
+
"RadioGroup1": "3",
|
|
72
|
+
"ChoiceField1": "Option 4",
|
|
73
|
+
"SignatureField": "/home/myself/signature.png"
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
You can then call the command with this JSON:
|
|
77
|
+
|
|
78
|
+
.. code-block:: shell
|
|
79
|
+
|
|
80
|
+
pdform fill-form template.pdf output.pdf data.json
|
|
81
|
+
|
|
82
|
+
Or pipe this JSON into the command:
|
|
83
|
+
|
|
84
|
+
.. code-block:: shell
|
|
85
|
+
|
|
86
|
+
echo {your json here} | pdform fill-form template.pdf output.pdf -
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
------------------
|
|
90
|
+
Converting to HTML
|
|
91
|
+
------------------
|
|
92
|
+
|
|
93
|
+
Converting to HTML relies on `pdf2htmlEX <https://pdf2htmlex.github.io/pdf2htmlEX/>`_ to generate the initial HTML. We then use `BeautifulSoup <https://beautiful-soup-4.readthedocs.io/en/latest/>`_ to strip away most of the unnessesary code, and add the form fields.
|
|
94
|
+
|
|
95
|
+
This function can be activated in one of two ways:
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
1. The `pdform make-html` command. Use `pdform make-html --help` for details.
|
|
99
|
+
2. Directly via Python.
|
|
100
|
+
|
|
101
|
+
The command-line interface is sufficient for basic usage. It provides a handful of different output formats: plain HTML, Jinja, and PHP.
|
|
102
|
+
|
|
103
|
+
.. code-block:: shell
|
|
104
|
+
|
|
105
|
+
pdform make-html --jinja input.pdf output.jinja
|
|
106
|
+
|
|
107
|
+
However, it is likely you may wish to customize the rendered HTML. The Python interfaces gives much more flexibility for this.
|
|
108
|
+
|
|
109
|
+
.. code-block:: python
|
|
110
|
+
|
|
111
|
+
from pdform.make_html import FieldRenderer, make_html
|
|
112
|
+
|
|
113
|
+
# Define your own field renderer to control the emitted code for form fields
|
|
114
|
+
class MyFieldRenderer(FieldRenderer):
|
|
115
|
+
# See the source code for details on how to implement this class
|
|
116
|
+
...
|
|
117
|
+
|
|
118
|
+
soup = make_html(path, field_renderer_class=MyFieldRenderer)
|
|
119
|
+
# Use the BeautifulSoup object to perform any post-processing to the generated HTML
|
|
120
|
+
# (See the BeautifulSoup documentation for how to use it to manipulate the DOM)
|
|
121
|
+
...
|
|
122
|
+
# Output the rendered HTML
|
|
123
|
+
print(soup.prettify())
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "pdform"
|
|
3
|
+
version = "0.2.0"
|
|
4
|
+
description = "A library and command-line tool for working with PDF interactive forms. Uses pikepdf and pdf2htmlex."
|
|
5
|
+
authors = [
|
|
6
|
+
{name = "Dominick Johnson",email = "dominick.johnson@tylertech.com"}
|
|
7
|
+
]
|
|
8
|
+
readme = "README.rst"
|
|
9
|
+
requires-python = ">=3.11"
|
|
10
|
+
dependencies = [
|
|
11
|
+
"pikepdf (>=9.8.0,<10.0.0)",
|
|
12
|
+
"click (>=8.2.0,<9.0.0)",
|
|
13
|
+
"pillow (>=11.2.1,<12.0.0)",
|
|
14
|
+
"beautifulsoup4 (>=4.13.4,<5.0.0)"
|
|
15
|
+
]
|
|
16
|
+
|
|
17
|
+
[tool.poetry]
|
|
18
|
+
packages = [{include = "pdform", from = "src"}]
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
[build-system]
|
|
22
|
+
requires = ["poetry-core>=2.0.0,<3.0.0"]
|
|
23
|
+
build-backend = "poetry.core.masonry.api"
|
|
24
|
+
|
|
25
|
+
[tool.poetry.scripts]
|
|
26
|
+
pdform = 'pdform.cli:cli'
|
|
27
|
+
|
|
28
|
+
[project.urls]
|
|
29
|
+
Homepage = "https://github.com/dmjohnsson23/pdform-pikpdf"
|
|
File without changes
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import click
|
|
2
|
+
from .make_html.cli import cli as make_html
|
|
3
|
+
from .describe import describe
|
|
4
|
+
from .fill_form import cli as fill_form
|
|
5
|
+
|
|
6
|
+
@click.group()
|
|
7
|
+
def cli():
|
|
8
|
+
pass
|
|
9
|
+
|
|
10
|
+
cli.add_command(make_html)
|
|
11
|
+
cli.add_command(describe)
|
|
12
|
+
cli.add_command(fill_form)
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
import click
|
|
2
|
+
from pikepdf import Pdf
|
|
3
|
+
from pikepdf.form import Form, TextField, CheckboxField, RadioButtonGroup, ChoiceField, SignatureField
|
|
4
|
+
import re
|
|
5
|
+
|
|
6
|
+
@click.command
|
|
7
|
+
@click.argument('path', type=click.File('rb'))
|
|
8
|
+
@click.option('--text', 'filter_types', help='Show text fields. (If no type filters are provided, all will be implied true)', multiple=True, flag_value='text')
|
|
9
|
+
@click.option('--checkbox', 'filter_types', help='Show checkbox fields. (If no type filters are provided, all will be implied true)', multiple=True, flag_value='checkbox')
|
|
10
|
+
@click.option('--radio', 'filter_types', help='Show radio fields. (If no type filters are provided, all will be implied true)', multiple=True, flag_value='radio')
|
|
11
|
+
@click.option('--choice', 'filter_types', help='Show choice fields. (If no type filters are provided, all will be implied true)', multiple=True, flag_value='choice')
|
|
12
|
+
@click.option('--signature', 'filter_types', help='Show signature fields. (If no type filters are provided, all will be implied true)', multiple=True, flag_value='signature')
|
|
13
|
+
@click.option('--name', '-n', 'filter_name', help='Show only fields with a name containing the given string. Use slashes to build a regex.', multiple=True, type=click.STRING)
|
|
14
|
+
@click.option('--label', '-l', 'filter_label', help='Show only fields with a label containing the given string. Use slashes to build a regex.',multiple=True, type=click.STRING)
|
|
15
|
+
@click.option('--names-only/--full-info', 'names_only', help='Determines if detailed information should be shown, or only field names.', default=False)
|
|
16
|
+
def describe(path, filter_types, filter_name, filter_label, names_only):
|
|
17
|
+
"""
|
|
18
|
+
Describe the fields in a form
|
|
19
|
+
"""
|
|
20
|
+
something_shown = False
|
|
21
|
+
with Pdf.open(path) as pdf:
|
|
22
|
+
form = Form(pdf)
|
|
23
|
+
|
|
24
|
+
with pdf.open_metadata() as meta:
|
|
25
|
+
title = meta.get('dc:title', pdf.filename)
|
|
26
|
+
click.secho('=' * len(title), reverse=True)
|
|
27
|
+
click.secho(title, reverse=True)
|
|
28
|
+
click.secho('=' * len(title), reverse=True)
|
|
29
|
+
click.echo()
|
|
30
|
+
|
|
31
|
+
if not form.exists:
|
|
32
|
+
click.secho("No interactive form exists in this document.", fg='yellow')
|
|
33
|
+
return
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
for name, field in form.items():
|
|
37
|
+
if filter_types:
|
|
38
|
+
if isinstance(field, TextField) and 'text' not in filter_types:
|
|
39
|
+
continue
|
|
40
|
+
if isinstance(field, CheckboxField) and 'checkbox' not in filter_types:
|
|
41
|
+
continue
|
|
42
|
+
if isinstance(field, RadioButtonGroup) and 'radio' not in filter_types:
|
|
43
|
+
continue
|
|
44
|
+
if isinstance(field, ChoiceField) and 'choice' not in filter_types:
|
|
45
|
+
continue
|
|
46
|
+
if isinstance(field, SignatureField) and 'signature' not in filter_types:
|
|
47
|
+
continue
|
|
48
|
+
|
|
49
|
+
if filter_name and not filter_match(filter_name, name):
|
|
50
|
+
continue
|
|
51
|
+
if filter_label and not filter_match(filter_label, field.alternate_name):
|
|
52
|
+
continue
|
|
53
|
+
|
|
54
|
+
something_shown = True
|
|
55
|
+
|
|
56
|
+
if names_only:
|
|
57
|
+
click.echo(name)
|
|
58
|
+
continue
|
|
59
|
+
|
|
60
|
+
click.secho(name, reverse=True, fg='cyan')
|
|
61
|
+
click.secho('-' * len(name), reverse=True, fg='cyan')
|
|
62
|
+
click.echo()
|
|
63
|
+
click.secho('Label:', fg='cyan')
|
|
64
|
+
click.echo("\t"+field.alternate_name)
|
|
65
|
+
click.echo()
|
|
66
|
+
click.secho('Type:', fg='cyan')
|
|
67
|
+
click.echo("\t"+type(field).__name__)
|
|
68
|
+
click.echo()
|
|
69
|
+
click.secho('Required:', fg='cyan')
|
|
70
|
+
click.echo("\t"+('Yes' if field.is_required else 'No'))
|
|
71
|
+
click.echo()
|
|
72
|
+
click.secho('Read Only:', fg='cyan')
|
|
73
|
+
click.echo("\t"+('Yes' if field.is_read_only else 'No'))
|
|
74
|
+
click.echo()
|
|
75
|
+
|
|
76
|
+
if isinstance(field, TextField):
|
|
77
|
+
click.secho('Multiline:', fg='cyan')
|
|
78
|
+
click.echo("\t"+('Yes' if field.is_multiline else 'No'))
|
|
79
|
+
click.echo()
|
|
80
|
+
click.secho('Max Length:', fg='cyan')
|
|
81
|
+
click.echo("\t"+str(field.max_length))
|
|
82
|
+
click.echo()
|
|
83
|
+
elif isinstance(field, CheckboxField):
|
|
84
|
+
click.secho('"On" Value:', fg='cyan')
|
|
85
|
+
click.echo("\t"+str(field.on_value))
|
|
86
|
+
click.echo()
|
|
87
|
+
elif isinstance(field, RadioButtonGroup):
|
|
88
|
+
click.secho('Can Toggle Off:', fg='cyan')
|
|
89
|
+
click.echo("\t"+('Yes' if field.can_toggle_off else 'No'))
|
|
90
|
+
click.echo()
|
|
91
|
+
click.secho('Possible Values:', fg='cyan')
|
|
92
|
+
for option in field.options:
|
|
93
|
+
click.echo("\t* "+str(option.on_value))
|
|
94
|
+
click.echo()
|
|
95
|
+
elif isinstance(field, ChoiceField):
|
|
96
|
+
click.secho('Possible Values:', fg='cyan')
|
|
97
|
+
for option in field.options:
|
|
98
|
+
click.echo("\t* "+str(option.display_value))
|
|
99
|
+
click.echo()
|
|
100
|
+
|
|
101
|
+
click.secho('Default Value:', fg='cyan')
|
|
102
|
+
click.echo("\t"+str(field.default_value))
|
|
103
|
+
click.echo()
|
|
104
|
+
click.secho('Current Value:', fg='cyan')
|
|
105
|
+
click.echo("\t"+str(field.value))
|
|
106
|
+
click.echo()
|
|
107
|
+
|
|
108
|
+
if not something_shown:
|
|
109
|
+
click.secho("No fields match the given criteria.", fg='yellow')
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def filter_match(filters, match_against):
|
|
113
|
+
matched = False
|
|
114
|
+
for fl in filters:
|
|
115
|
+
if fl.startswith('/') and fl.endswith('/'):
|
|
116
|
+
if re.search(fl[1:-2], match_against):
|
|
117
|
+
matched = True
|
|
118
|
+
elif fl.lower() in match_against.lower():
|
|
119
|
+
matched = True
|
|
120
|
+
return matched
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
if __name__ == '__main__':
|
|
124
|
+
describe()
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
from io import BytesIO
|
|
2
|
+
from pikepdf import Name, Pdf, Page, Rectangle
|
|
3
|
+
from pikepdf.form import Form, TextField, CheckboxField, RadioButtonGroup, ChoiceField, SignatureField, ExtendedAppearanceStreamGenerator
|
|
4
|
+
from PIL import Image
|
|
5
|
+
import click
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@click.command('fill-form', help='Populate the template with the provided data')
|
|
9
|
+
@click.argument('template', type=click.File(), required=True)
|
|
10
|
+
@click.argument('output', type=click.File('w'), required=True)
|
|
11
|
+
@click.argument('data-file', type=click.File(), default='-')
|
|
12
|
+
@click.option('--data-format', help='The format of the data file.', type=click.Choice(('json',)), default='json')
|
|
13
|
+
@click.option('--set', '-s', 'cli_data', nargs=2, multiple=True, help='Set a field value in the form. Using this option causes the data file to be ignored.')
|
|
14
|
+
def cli(template, output, data_file, data_format, cli_data):
|
|
15
|
+
if cli_data:
|
|
16
|
+
data = dict(cli_data)
|
|
17
|
+
else:
|
|
18
|
+
data = parse_data(data_format, data_file)
|
|
19
|
+
with Pdf.open(template) as pdf:
|
|
20
|
+
fill_form(pdf, data)
|
|
21
|
+
pdf.save(output)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def parse_data(format, file):
|
|
25
|
+
if format == 'json':
|
|
26
|
+
from json import load
|
|
27
|
+
return load(file)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def img_to_pdf(img) -> Pdf:
|
|
31
|
+
"""
|
|
32
|
+
Convert an image to a PDF.
|
|
33
|
+
|
|
34
|
+
The input image may be:
|
|
35
|
+
|
|
36
|
+
* An open file-like object
|
|
37
|
+
* A path
|
|
38
|
+
* A base64 data URL
|
|
39
|
+
"""
|
|
40
|
+
if isinstance(img, str) and img.startswith('data:'):
|
|
41
|
+
# embedded base64
|
|
42
|
+
from base64 import b64decode
|
|
43
|
+
_, path = path.split(',', 2)
|
|
44
|
+
path = BytesIO(b64decode(path))
|
|
45
|
+
# Open image and convert to RGB (Greyscale images cause issues)
|
|
46
|
+
img = Image.open(img).convert('RGB')
|
|
47
|
+
# Convert the image to a PDF
|
|
48
|
+
pdf_img = BytesIO()
|
|
49
|
+
img.save(pdf_img, 'pdf')
|
|
50
|
+
pdf_img.seek(0)
|
|
51
|
+
return Pdf.open(pdf_img)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def stamp(img, page:Page, rect:Rectangle):
|
|
55
|
+
"""
|
|
56
|
+
Stamp an image on the page, fitting it in the box of the given rect.
|
|
57
|
+
|
|
58
|
+
:param img: The image to stamp. Can be a file path, open file object, or base64 data URL.
|
|
59
|
+
:param page: The page to stamp the image on.
|
|
60
|
+
:param rect: The box in which to place the image. The image will be scaled to fit.
|
|
61
|
+
"""
|
|
62
|
+
with img_to_pdf(img) as stamp_pdf:
|
|
63
|
+
page.add_overlay(stamp_pdf.pages[0], rect)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def fill_form(pdf:Pdf, data:dict):
|
|
67
|
+
"""
|
|
68
|
+
Fill the form fields of the given PDF with the data provided.
|
|
69
|
+
|
|
70
|
+
:param pdf: The PDF to populate with data
|
|
71
|
+
:param data: The data to populate the form with. The keys of this dictionary should match
|
|
72
|
+
the field's fully-qualified name. The values should be as follows:
|
|
73
|
+
|
|
74
|
+
* For text fields, provide the value to set
|
|
75
|
+
* For checkboxes, provide a boolean
|
|
76
|
+
* For radio buttons, provide the value in the button's AP.N dictionary
|
|
77
|
+
* For signature fields, provide the path to an image which will be stamped in its place
|
|
78
|
+
(real cryptographic signatures are not supported)
|
|
79
|
+
"""
|
|
80
|
+
# Populate form
|
|
81
|
+
form = Form(pdf, ExtendedAppearanceStreamGenerator)
|
|
82
|
+
for key, field in form.items():
|
|
83
|
+
if key and key in data and data[key] is not None:
|
|
84
|
+
value = data[key]
|
|
85
|
+
if isinstance(field, (TextField, ChoiceField)):
|
|
86
|
+
field.value = value
|
|
87
|
+
elif isinstance(field, CheckboxField):
|
|
88
|
+
if value is True:
|
|
89
|
+
field.checked = True
|
|
90
|
+
elif value is None or value is False:
|
|
91
|
+
field.checked = False
|
|
92
|
+
else:
|
|
93
|
+
field.value = to_name(value)
|
|
94
|
+
elif isinstance(field, RadioButtonGroup):
|
|
95
|
+
field.value = to_name(value)
|
|
96
|
+
elif isinstance(field, SignatureField):
|
|
97
|
+
if isinstance(value, str):
|
|
98
|
+
img = value
|
|
99
|
+
expand = None
|
|
100
|
+
else:
|
|
101
|
+
img = value['img']
|
|
102
|
+
expand = value.get('expand_rect')
|
|
103
|
+
with img_to_pdf(img) as stamp_pdf:
|
|
104
|
+
field.stamp_overlay(stamp_pdf.pages[0], expand_rect=expand)
|
|
105
|
+
if '.stamps' in data:
|
|
106
|
+
# Custom stamps not associated with fields
|
|
107
|
+
for stamp_data in data['.stamps']:
|
|
108
|
+
if not stamp_data['img']:
|
|
109
|
+
continue
|
|
110
|
+
stamp(stamp_data['img'], pdf.pages[stamp_data['page']-1], Rectangle(*stamp_data['rect']))
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def to_name(value: str):
|
|
114
|
+
if not value.startswith('/'):
|
|
115
|
+
value = f"/{value}"
|
|
116
|
+
return Name(value)
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import click
|
|
2
|
+
from .make_html import make_html
|
|
3
|
+
from .field_renderer import FieldRenderer, PHPFieldRenderer, JinjaFieldRenderer
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@click.command('make-html', help='Convert a PDF form into an HTML form')
|
|
7
|
+
@click.argument('path', type=click.Path(True, dir_okay=False))
|
|
8
|
+
@click.argument('output', type=click.File('w'))
|
|
9
|
+
@click.option('--pdf2html', help='Override the path used to call Pdf2HmlEX', default='pdf2htmlex')
|
|
10
|
+
@click.option('--zoom', help='The size at which to render the PDF into HTML', type=click.FloatRange(0, None, True), default=1)
|
|
11
|
+
@click.option('--sort-widgets/--original-widget-sorting', help='Attempt to re-sort widgets based on their location on the page.', default=False)
|
|
12
|
+
@click.option('--rename-fields/--original-fields-naming', help='Rename fields, removing special characters.', default=False)
|
|
13
|
+
@click.option('--from-page', help='Start rendering at this page', type=click.IntRange(1), default=1)
|
|
14
|
+
@click.option('--to-page', help='Stop rendering after this page', type=click.IntRange(1))
|
|
15
|
+
@click.option('--html', 'field_renderer_class', help='Render the page as plain HTML', flag_value='html', default=True)
|
|
16
|
+
@click.option('--php', 'field_renderer_class', help='Render the page as PHP code', flag_value='php')
|
|
17
|
+
@click.option('--jinja', 'field_renderer_class', help='Render the page as a Jinja template', flag_value='jinja')
|
|
18
|
+
def cli(path, output, *, field_renderer_class, **kwargs):
|
|
19
|
+
kwargs['field_renderer_class'] = {
|
|
20
|
+
'html':FieldRenderer,
|
|
21
|
+
'php':PHPFieldRenderer,
|
|
22
|
+
'jinja':JinjaFieldRenderer,
|
|
23
|
+
}[field_renderer_class]
|
|
24
|
+
soup = make_html(path, **kwargs)
|
|
25
|
+
output.write(str(soup))
|
|
26
|
+
return output
|
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
from html import escape
|
|
2
|
+
from pikepdf.form import _FieldWrapper
|
|
3
|
+
|
|
4
|
+
class FieldRenderer:
|
|
5
|
+
"""
|
|
6
|
+
Used to render HTML inputs in the output HTML. Subclass to output the inputs in various different template formats (e.g. Jinja, PHP, etc...).
|
|
7
|
+
"""
|
|
8
|
+
_renderer_type = None
|
|
9
|
+
type: str
|
|
10
|
+
name: str
|
|
11
|
+
label: str
|
|
12
|
+
style: dict
|
|
13
|
+
field: _FieldWrapper
|
|
14
|
+
|
|
15
|
+
def __init__(self, field):
|
|
16
|
+
self.field = field
|
|
17
|
+
|
|
18
|
+
@classmethod
|
|
19
|
+
def set_render_type(cls, renderer):
|
|
20
|
+
if not issubclass(renderer, cls):
|
|
21
|
+
raise TypeError('Renderer type must be a subclass of FieldRenderer')
|
|
22
|
+
cls._renderer_type = renderer
|
|
23
|
+
|
|
24
|
+
@classmethod
|
|
25
|
+
def make(cls, type, field):
|
|
26
|
+
renderer = (cls._renderer_type or cls)(field)
|
|
27
|
+
renderer.type = type
|
|
28
|
+
return renderer
|
|
29
|
+
|
|
30
|
+
def __str__(self):
|
|
31
|
+
return self.render()
|
|
32
|
+
|
|
33
|
+
def render(self):
|
|
34
|
+
if self.type == 'button':
|
|
35
|
+
return self.render_button()
|
|
36
|
+
if self.type == 'checkbox':
|
|
37
|
+
return self.render_checkbox()
|
|
38
|
+
if self.type == 'file':
|
|
39
|
+
return self.render_file()
|
|
40
|
+
if self.type == 'password':
|
|
41
|
+
return self.render_password()
|
|
42
|
+
if self.type == 'radio':
|
|
43
|
+
return self.render_radio()
|
|
44
|
+
if self.type == 'select':
|
|
45
|
+
return self.render_select()
|
|
46
|
+
if self.type == 'signature':
|
|
47
|
+
return self.render_signature()
|
|
48
|
+
if self.type == 'text':
|
|
49
|
+
return self.render_text()
|
|
50
|
+
if self.type == 'textarea':
|
|
51
|
+
return self.render_textarea()
|
|
52
|
+
raise ValueError(f'Unknown input type: {self.type}')
|
|
53
|
+
|
|
54
|
+
def render_style_attr_value(self):
|
|
55
|
+
if self.style is None:
|
|
56
|
+
return None
|
|
57
|
+
return escape(';'.join([f"{key}:{value}" for key,value in self.style.items()]))
|
|
58
|
+
|
|
59
|
+
def render_template_value_variable(self):
|
|
60
|
+
"""If this renderer is for a template type, returns the variable name that should contain the value of this field."""
|
|
61
|
+
return ''
|
|
62
|
+
|
|
63
|
+
def render_button(self):
|
|
64
|
+
"""Render a push button using the properties of this renderer"""
|
|
65
|
+
return f""""""
|
|
66
|
+
|
|
67
|
+
def render_checkbox(self):
|
|
68
|
+
"""Render a checkbox using the properties of this renderer"""
|
|
69
|
+
return f"""<input type='checkbox' {self.render_basic_attrs()} {self.render_value_checked_if()}/>"""
|
|
70
|
+
|
|
71
|
+
def render_file(self):
|
|
72
|
+
"""Render a file input using the properties of this renderer"""
|
|
73
|
+
return f""""""
|
|
74
|
+
|
|
75
|
+
def render_password(self):
|
|
76
|
+
"""Render a password input using the properties of this renderer"""
|
|
77
|
+
return f"""<input type='password' {self.render_basic_attrs()} {self.render_value_attr()}/>"""
|
|
78
|
+
|
|
79
|
+
def render_radio(self):
|
|
80
|
+
"""Render a radio button using the properties of this renderer"""
|
|
81
|
+
return f"""<input type='radio' {self.render_basic_attrs()} {self.render_value_checked_if()}/>"""
|
|
82
|
+
|
|
83
|
+
def render_select(self):
|
|
84
|
+
"""Render a select element using the properties of this renderer"""
|
|
85
|
+
return f"""<select {self.render_basic_attrs()}>
|
|
86
|
+
{''.join(f"<option>{opt.display_value}</option>" for opt in self.field.options)}
|
|
87
|
+
</select>"""
|
|
88
|
+
|
|
89
|
+
def render_signature(self):
|
|
90
|
+
"""Render a signature field using the properties of this renderer"""
|
|
91
|
+
return f"""<input type='file' data-real-type='signature' {self.render_basic_attrs()}/>"""
|
|
92
|
+
|
|
93
|
+
def render_text(self):
|
|
94
|
+
"""Render a text field using the properties of this renderer"""
|
|
95
|
+
return f"""<input type='text' {self.render_basic_attrs()} {self.render_value_attr()}/>"""
|
|
96
|
+
|
|
97
|
+
def render_textarea(self):
|
|
98
|
+
"""Render a multiline text field using the properties of this renderer"""
|
|
99
|
+
return f"""<textarea {self.render_basic_attrs()}>{self.render_value_content()}</textarea>"""
|
|
100
|
+
|
|
101
|
+
def render_basic_attrs(self):
|
|
102
|
+
"""Render the basic attributes (name and style) to apply to the field, regardless of type."""
|
|
103
|
+
return f"""name='{escape(self.name)}' aria-label='{escape(self.label)}' style='{self.render_style_attr_value()}'"""
|
|
104
|
+
|
|
105
|
+
def render_value_attr(self):
|
|
106
|
+
"""Render the value attribute of a field, or template code to generate such"""
|
|
107
|
+
return ''
|
|
108
|
+
|
|
109
|
+
def render_value_content(self):
|
|
110
|
+
"""Render the raw value of a field (e.g. for use in a textarea), or template code to generate such"""
|
|
111
|
+
return ''
|
|
112
|
+
|
|
113
|
+
def render_value_checked_if(self):
|
|
114
|
+
"""Render the 'checked' attribute for checkboxes or radio buttons, or template code to generate such"""
|
|
115
|
+
return ''
|
|
116
|
+
|
|
117
|
+
def render_html_escape(self, value:str):
|
|
118
|
+
"""Given a string, which should be a statement in the target template language, and add the
|
|
119
|
+
code necessary to escape the results for safe inclusion in HTML source."""
|
|
120
|
+
return value
|
|
121
|
+
|
|
122
|
+
def render_echo_statement(self, stmt:str):
|
|
123
|
+
"""Given a string, which should be a statement in the target template language, render the
|
|
124
|
+
necessary syntax to output the result of the statement into the rendered HTML."""
|
|
125
|
+
return ''
|
|
126
|
+
|
|
127
|
+
def render_echo_statement_if(self, condition:str, stmt:str, *, html_escape=True):
|
|
128
|
+
"""Given two strings, which should be statements in the target template language, render the
|
|
129
|
+
necessary syntax to output the result of the second statement into the rendered HTML,
|
|
130
|
+
conditional on the value of the first."""
|
|
131
|
+
return ''
|
|
132
|
+
|
|
133
|
+
def render_if(self, condition:str, html:str):
|
|
134
|
+
"""Given two strings, the first of which should be a statement in the target template
|
|
135
|
+
language, and the second of which is raw HTML or template code, render the necessary syntax
|
|
136
|
+
to output the raw value conditional on the statement."""
|
|
137
|
+
return html
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
class PHPFieldRenderer(FieldRenderer):
|
|
142
|
+
"""
|
|
143
|
+
Render the HTML as PHP source code.
|
|
144
|
+
"""
|
|
145
|
+
def render_echo_statement(self, stmt:str):
|
|
146
|
+
return f"<?={stmt}?>"
|
|
147
|
+
|
|
148
|
+
def render_echo_statement_if(self, condition:str, stmt:str, *, html_escape=True):
|
|
149
|
+
if html_escape:
|
|
150
|
+
stmt = self.render_html_escape(stmt)
|
|
151
|
+
return self.render_echo_statement(
|
|
152
|
+
f"{condition} ? '' : {stmt}"
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
def render_if(self, condition:str, html:str):
|
|
156
|
+
return f"<?php if ({condition}):?>{html}<?endif;?>"
|
|
157
|
+
|
|
158
|
+
def render_template_value_variable(self):
|
|
159
|
+
return f"""$fd['{self.name}']"""
|
|
160
|
+
|
|
161
|
+
def render_value_attr(self):
|
|
162
|
+
return self.render_echo_statement_if(
|
|
163
|
+
f"empty({self.render_template_value_variable()})",
|
|
164
|
+
f"""'value="'.{self.render_html_escape(self.render_template_value_variable())}.'"'""",
|
|
165
|
+
html_escape=False
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
def render_value_content(self):
|
|
169
|
+
return self.render_echo_statement_if(
|
|
170
|
+
f"empty({self.render_template_value_variable()})",
|
|
171
|
+
self.render_template_value_variable()
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
def render_value_checked_if(self):
|
|
175
|
+
return self.render_echo_statement_if(
|
|
176
|
+
f"empty({self.render_template_value_variable()})",
|
|
177
|
+
f"'checked'",
|
|
178
|
+
html_escape=False
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
def render_html_escape(self, value:str):
|
|
182
|
+
return f"htmlspecialchars({value})"
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
class JinjaFieldRenderer(FieldRenderer):
|
|
186
|
+
"""
|
|
187
|
+
Render the HTML as a Jinja template. This does not use advanced Jinja features, so templates
|
|
188
|
+
produced may work in other template engines with a similar syntax, requiring little or no
|
|
189
|
+
modification.
|
|
190
|
+
"""
|
|
191
|
+
def render_echo_statement(self, stmt:str):
|
|
192
|
+
return f"{{{{{stmt}}}}}"
|
|
193
|
+
|
|
194
|
+
def render_echo_statement_if(self, condition:str, stmt:str, *, html_escape=True):
|
|
195
|
+
if html_escape:
|
|
196
|
+
stmt = self.render_html_escape(stmt)
|
|
197
|
+
return self.render_if(condition, self.render_echo_statement(stmt))
|
|
198
|
+
|
|
199
|
+
def render_if(self, condition:str, html:str):
|
|
200
|
+
return f"""{{% if {condition} %}}{html}{{% endif %}}"""
|
|
201
|
+
|
|
202
|
+
def render_template_value_variable(self):
|
|
203
|
+
return f"""fd['{self.name}']"""
|
|
204
|
+
|
|
205
|
+
def render_value_attr(self):
|
|
206
|
+
value = self.render_echo_statement(self.render_html_escape(self.render_template_value_variable()))
|
|
207
|
+
return self.render_if(
|
|
208
|
+
self.render_template_value_variable(),
|
|
209
|
+
f"value='{value}'"
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
def render_value_content(self):
|
|
213
|
+
return self.render_echo_statement_if(
|
|
214
|
+
self.render_template_value_variable(),
|
|
215
|
+
self.render_template_value_variable()
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
def render_value_checked_if(self):
|
|
219
|
+
return self.render_if(
|
|
220
|
+
self.render_template_value_variable(),
|
|
221
|
+
'checked'
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
def render_html_escape(self, value:str):
|
|
225
|
+
return f"{value} | e"
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from subprocess import run
|
|
3
|
+
from .template_soup import TemplateSoup
|
|
4
|
+
from bs4 import Tag, BeautifulSoup
|
|
5
|
+
import tempfile
|
|
6
|
+
from .process_form import add_form_fields
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from pikepdf import Pdf
|
|
9
|
+
from pikepdf.form import Form
|
|
10
|
+
import re
|
|
11
|
+
from base64 import urlsafe_b64decode
|
|
12
|
+
from io import StringIO
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def make_html(path:str|Path, *, pdf2html:str='pdf2htmlex', zoom:int|float=1, from_page:int|None=None, to_page:int|None=None, **process_form_args):
|
|
16
|
+
output_path = tempfile.mktemp()
|
|
17
|
+
|
|
18
|
+
pdf2html_options = [
|
|
19
|
+
'--zoom', str(zoom),
|
|
20
|
+
'--no-drm', '1',
|
|
21
|
+
'--printing', '0',
|
|
22
|
+
'--bg-format', 'svg',
|
|
23
|
+
]
|
|
24
|
+
if from_page is not None:
|
|
25
|
+
pdf2html_options.append('--first-page')
|
|
26
|
+
pdf2html_options.append(str(from_page))
|
|
27
|
+
if to_page is not None:
|
|
28
|
+
pdf2html_options.append('--last-page')
|
|
29
|
+
pdf2html_options.append(str(to_page))
|
|
30
|
+
|
|
31
|
+
# Run pdf2htmlex to get the initial base HTML
|
|
32
|
+
result = run([
|
|
33
|
+
pdf2html,
|
|
34
|
+
*pdf2html_options,
|
|
35
|
+
path,
|
|
36
|
+
os.path.relpath(output_path)
|
|
37
|
+
])
|
|
38
|
+
result.check_returncode()
|
|
39
|
+
|
|
40
|
+
with open(output_path, 'r') as file:
|
|
41
|
+
soup = TemplateSoup(file, 'lxml')
|
|
42
|
+
|
|
43
|
+
# Remove all the extra stuff we don't need
|
|
44
|
+
for script in soup.find_all('script'):
|
|
45
|
+
script.decompose()
|
|
46
|
+
for el in soup.find_all(id='sidebar'):
|
|
47
|
+
el.decompose()
|
|
48
|
+
for el in soup.find_all(class_='loading-indicator'):
|
|
49
|
+
el.decompose()
|
|
50
|
+
for el in soup.find_all(class_='pi'):
|
|
51
|
+
el.decompose()
|
|
52
|
+
for el in soup.find_all('img'):
|
|
53
|
+
unwrap_svg_img(el)
|
|
54
|
+
for el in soup.find_all('style'):
|
|
55
|
+
if '* Fancy styles for pdf2htmlEX' in el.string:
|
|
56
|
+
el.decompose()
|
|
57
|
+
elif '* Base CSS for pdf2htmlEX' in el.string:
|
|
58
|
+
# This one also has a lot of junk we don't need, but some stuff we do.
|
|
59
|
+
css = el.string
|
|
60
|
+
# All the UI-related stuff right after the header
|
|
61
|
+
css = re.sub('(?<=\*/).*?(?=\.pf\{)', '', css)
|
|
62
|
+
# Selection, page info (.pi), css drawings (.d), text input (.it), radio input (.ir)
|
|
63
|
+
css = re.sub('::(-moz-)?selection\{background:rgba\(127,255,255,0\.4\)\}.*', '', css)
|
|
64
|
+
el.string = css
|
|
65
|
+
# Copy any new styles we've created
|
|
66
|
+
sio = StringIO()
|
|
67
|
+
for style, css_class in svg_path_styles.items():
|
|
68
|
+
sio.write('.')
|
|
69
|
+
sio.write(css_class)
|
|
70
|
+
sio.write('{')
|
|
71
|
+
sio.write(style)
|
|
72
|
+
sio.write('}\n')
|
|
73
|
+
sio.seek(0)
|
|
74
|
+
new_styles = soup.new_tag('style')
|
|
75
|
+
new_styles.string = sio.read()
|
|
76
|
+
soup.head.append(new_styles)
|
|
77
|
+
|
|
78
|
+
# Add our own stuff direct from the PDF
|
|
79
|
+
with Pdf.open(path) as pdf:
|
|
80
|
+
form = Form(pdf)
|
|
81
|
+
add_form_fields(soup, pdf, form,
|
|
82
|
+
zoom=zoom,
|
|
83
|
+
start_page=from_page,
|
|
84
|
+
**process_form_args
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
return soup
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
path_style_counter = 0
|
|
91
|
+
svg_path_styles = {}
|
|
92
|
+
def unwrap_svg_img(img_el:Tag):
|
|
93
|
+
global path_style_counter, svg_path_styles
|
|
94
|
+
data_url = img_el['src']
|
|
95
|
+
if not data_url.startswith('data:image/svg+xml;base64,'):
|
|
96
|
+
return
|
|
97
|
+
data_url = data_url[26:]
|
|
98
|
+
svg = BeautifulSoup(urlsafe_b64decode(data_url), 'xml').svg
|
|
99
|
+
del svg['xmlns']
|
|
100
|
+
del svg['xmlns:xlink']
|
|
101
|
+
svg['class'] = img_el['class']
|
|
102
|
+
for path in svg.find_all('path'):
|
|
103
|
+
style = path['style']
|
|
104
|
+
if style in svg_path_styles:
|
|
105
|
+
css_class = svg_path_styles[style]
|
|
106
|
+
else:
|
|
107
|
+
css_class = f"svp{path_style_counter}"
|
|
108
|
+
path_style_counter += 1
|
|
109
|
+
svg_path_styles[style] = css_class
|
|
110
|
+
del path['style']
|
|
111
|
+
path['class'] = css_class
|
|
112
|
+
img_el.replace_with(svg)
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from .template_soup import TemplateSoup
|
|
3
|
+
from pikepdf import Pdf, Annotation
|
|
4
|
+
from pikepdf.form import Form, TextField, CheckboxField, RadioButtonGroup, ChoiceField, SignatureField
|
|
5
|
+
from .field_renderer import FieldRenderer
|
|
6
|
+
from typing import Type
|
|
7
|
+
from functools import cmp_to_key
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def add_form_fields(soup: TemplateSoup, pdf:Pdf, form: Form, zoom: int|float = 1, rename_fields = {}, field_labels = {}, sort_widgets=False, start_page:int=1, field_renderer_class:Type[FieldRenderer]=FieldRenderer):
|
|
11
|
+
"""
|
|
12
|
+
:param rename_fields: A mapping of PDF field names to desired HTML field names.
|
|
13
|
+
:param field_labels: A mapping of PDF field names to human-readable labels.
|
|
14
|
+
:param sort_widgets: Attempt to sort widgets according to their visual placement on the page.
|
|
15
|
+
This can be useful for PDF forms where the tab order is illogical, though some manual
|
|
16
|
+
refinement may still be needed afterward for a truly logical tab order.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
html_form = soup.find(id='page-container').wrap(soup.new_tag('form'))
|
|
20
|
+
html_pages = html_form.find_all(class_='pf')
|
|
21
|
+
rendered_fields = {}
|
|
22
|
+
i = 0
|
|
23
|
+
for page_no, pdf_page in enumerate(pdf.pages, 1):
|
|
24
|
+
if page_no < start_page: continue
|
|
25
|
+
widgets = form.get_widget_annotations_for_page(pdf_page)
|
|
26
|
+
if not widgets: continue
|
|
27
|
+
html_page = html_pages[page_no-start_page]
|
|
28
|
+
fieldset = soup.new_tag('div', attrs={'class':'form-inputs'})
|
|
29
|
+
if callable(sort_widgets):
|
|
30
|
+
widgets = sort_widgets(widgets)
|
|
31
|
+
elif sort_widgets is True:
|
|
32
|
+
widgets = sorted(widgets, key=cmp_to_key(_cmp_widgets))
|
|
33
|
+
for widget in widgets:
|
|
34
|
+
widget: Annotation
|
|
35
|
+
field = form.get_field_for_annotation(widget)
|
|
36
|
+
i += 1
|
|
37
|
+
if field.is_radio_button:
|
|
38
|
+
field = RadioButtonGroup(form, field)
|
|
39
|
+
input = field_renderer_class.make('radio', field)
|
|
40
|
+
elif field.is_checkbox:
|
|
41
|
+
field = CheckboxField(form, field)
|
|
42
|
+
input = field_renderer_class.make('checkbox', field)
|
|
43
|
+
elif field.is_pushbutton:
|
|
44
|
+
input = field_renderer_class.make('button', field)
|
|
45
|
+
elif field.is_text:
|
|
46
|
+
field = TextField(form, field)
|
|
47
|
+
if field.is_multiline:
|
|
48
|
+
input = field_renderer_class.make('textarea', field)
|
|
49
|
+
elif field.is_password:
|
|
50
|
+
input = field_renderer_class.make('password', field)
|
|
51
|
+
elif field.is_password:
|
|
52
|
+
input = field_renderer_class.make('file', field)
|
|
53
|
+
else:
|
|
54
|
+
input = field_renderer_class.make('text', field)
|
|
55
|
+
elif field.is_choice:
|
|
56
|
+
field = ChoiceField(form, field)
|
|
57
|
+
input = field_renderer_class.make('select', field)
|
|
58
|
+
elif field.field_type == "/Sig":
|
|
59
|
+
field = SignatureField(form, field)
|
|
60
|
+
input = field_renderer_class.make('signature', field)
|
|
61
|
+
else:
|
|
62
|
+
continue
|
|
63
|
+
name = field.fully_qualified_name
|
|
64
|
+
if callable(rename_fields):
|
|
65
|
+
input.name = rename_fields(name, field)
|
|
66
|
+
elif isinstance(rename_fields, dict) and name in rename_fields:
|
|
67
|
+
input.name = rename_fields[name]
|
|
68
|
+
elif rename_fields is True:
|
|
69
|
+
input.name = _auto_rename(name)
|
|
70
|
+
else:
|
|
71
|
+
input.name = name
|
|
72
|
+
if name in field_labels:
|
|
73
|
+
input.label = field_labels[name]
|
|
74
|
+
else:
|
|
75
|
+
input.label = field.alternate_name
|
|
76
|
+
# The PDF format considers the bottom-left corner to be the origin, so we use that to place
|
|
77
|
+
scale = zoom
|
|
78
|
+
input.style = {
|
|
79
|
+
'position': 'absolute',
|
|
80
|
+
'left': f'{widget.rect.llx*scale}px',
|
|
81
|
+
'bottom': f'{widget.rect.lly*scale}px',
|
|
82
|
+
'width': f'{widget.rect.width*scale}px',
|
|
83
|
+
'height': f'{widget.rect.height*scale}px',
|
|
84
|
+
}
|
|
85
|
+
fieldset.append(soup.make_placeholder(value=input))
|
|
86
|
+
html_page.append(fieldset)
|
|
87
|
+
style = soup.new_tag('style')
|
|
88
|
+
style.append("""
|
|
89
|
+
.form-inputs{
|
|
90
|
+
bottom: 0;
|
|
91
|
+
left: 0;
|
|
92
|
+
position: absolute;
|
|
93
|
+
}
|
|
94
|
+
.form-inputs input,
|
|
95
|
+
.form-inputs textarea,
|
|
96
|
+
.form-inputs select{
|
|
97
|
+
border: none;
|
|
98
|
+
background: rgba(0,0,0,.05);
|
|
99
|
+
resize: none;
|
|
100
|
+
appearance: none;
|
|
101
|
+
margin: 0
|
|
102
|
+
}
|
|
103
|
+
.form-inputs input:hover,
|
|
104
|
+
.form-inputs textarea:hover,
|
|
105
|
+
.form-inputs select:hover{
|
|
106
|
+
box-shadow: inset 0 0 5px 5px rgba(0, 0, 0, .1);
|
|
107
|
+
}
|
|
108
|
+
.form-inputs input:checked::after{
|
|
109
|
+
display: block;
|
|
110
|
+
content: '\\2714';
|
|
111
|
+
width: 100%;
|
|
112
|
+
height: 100%;
|
|
113
|
+
text-align: center;
|
|
114
|
+
}
|
|
115
|
+
""")
|
|
116
|
+
soup.find('head').append(style)
|
|
117
|
+
return rendered_fields
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
_rename_re = None
|
|
121
|
+
def _auto_rename(name:str):
|
|
122
|
+
global _rename_re
|
|
123
|
+
if _rename_re is None:
|
|
124
|
+
import re
|
|
125
|
+
_rename_re = re.compile('[^A-Za-z0-9]+')
|
|
126
|
+
name = _rename_re.sub('_', name).strip('_')
|
|
127
|
+
if name[0].isdigit():
|
|
128
|
+
return f"_{name}"
|
|
129
|
+
return name
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def _cmp_widgets(wig1: Annotation, wig2: Annotation):
|
|
133
|
+
rect1 = wig1.rect
|
|
134
|
+
rect2 = wig2.rect
|
|
135
|
+
w1_then_w2 = -1
|
|
136
|
+
w2_then_w1 = 1
|
|
137
|
+
if rect1.lly >= rect2.ury:
|
|
138
|
+
# Bottom of wig1 above top of wig2; wig2 is after wig1
|
|
139
|
+
return w1_then_w2
|
|
140
|
+
if rect1.ury <= rect2.lly:
|
|
141
|
+
# Top of wig1 below bottom of wig2; wig2 is before wig1
|
|
142
|
+
return w2_then_w1
|
|
143
|
+
# The two are in line, or at leaest overlapping in the y direction; compare x values
|
|
144
|
+
if rect1.urx <= rect2.llx:
|
|
145
|
+
# Right of wig1 before left of wig2; wig2 is after wig1
|
|
146
|
+
return w1_then_w2
|
|
147
|
+
if rect1.llx >= rect2.urx:
|
|
148
|
+
# Left of wig1 after right of wig2; wig2 is before wig1
|
|
149
|
+
return w2_then_w1
|
|
150
|
+
# Rectangles overlap in both x and y directions, let's just compare top-left corner
|
|
151
|
+
if rect1.ury > rect2.ury:
|
|
152
|
+
# rect1 higher than rect2; wig2 is after wig1
|
|
153
|
+
return w1_then_w2
|
|
154
|
+
if rect1.ury < rect2.ury:
|
|
155
|
+
# rect1 lower than rect2; wig2 is before wig1
|
|
156
|
+
return w2_then_w1
|
|
157
|
+
if rect1.llx > rect2.llx:
|
|
158
|
+
# rect1 further than rect2; wig2 is before wig1
|
|
159
|
+
return w2_then_w1
|
|
160
|
+
if rect1.llx < rect2.llx:
|
|
161
|
+
# rect2 further than rect1; wig2 is after wig1
|
|
162
|
+
return w1_then_w2
|
|
163
|
+
# Okay, we give up, they share the same upper-left
|
|
164
|
+
return 0
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
from bs4 import BeautifulSoup
|
|
2
|
+
from bs4.element import PreformattedString
|
|
3
|
+
from string import Template
|
|
4
|
+
from secrets import token_hex
|
|
5
|
+
|
|
6
|
+
class TemplateSoup(BeautifulSoup):
|
|
7
|
+
def __init__(self, *args, **kwargs):
|
|
8
|
+
super().__init__(*args, **kwargs)
|
|
9
|
+
self.template = {}
|
|
10
|
+
self._substitutions = {}
|
|
11
|
+
|
|
12
|
+
def __str__(self):
|
|
13
|
+
return Template(super().__str__()).safe_substitute(self._substitutions)
|
|
14
|
+
|
|
15
|
+
def prettify(self, *args, **kwargs):
|
|
16
|
+
return Template(super().prettify(*args, **kwargs)).safe_substitute(self._substitutions)
|
|
17
|
+
|
|
18
|
+
def make_placeholder(self, name:str|None = None, value=None)->'Placeholder':
|
|
19
|
+
if name is None:
|
|
20
|
+
name = substitution_name = 'p'+token_hex(16)
|
|
21
|
+
else:
|
|
22
|
+
substitution_name = name+token_hex(8)
|
|
23
|
+
pl = Placeholder(substitution_name)
|
|
24
|
+
if value is not None:
|
|
25
|
+
pl.substitution_value = value
|
|
26
|
+
self.template[name] = pl
|
|
27
|
+
self._substitutions[substitution_name] = pl.substitution_string_proxy
|
|
28
|
+
return pl
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class Placeholder(PreformattedString):
|
|
32
|
+
PREFIX: str = "${"
|
|
33
|
+
SUFFIX: str = "}"
|
|
34
|
+
|
|
35
|
+
substitution_value = None
|
|
36
|
+
|
|
37
|
+
@property
|
|
38
|
+
def substitution_string(self):
|
|
39
|
+
return str(self.substitution_value)
|
|
40
|
+
|
|
41
|
+
@property
|
|
42
|
+
def substitution_string_proxy(self):
|
|
43
|
+
return _SubstitutionStringProxy(self)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class _SubstitutionStringProxy:
|
|
47
|
+
def __init__(self, placeholder:Placeholder):
|
|
48
|
+
self.placeholder = placeholder
|
|
49
|
+
|
|
50
|
+
def __str__(self):
|
|
51
|
+
return self.placeholder.substitution_string
|