ppapp 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,368 @@
1
+ \documentclass[screen,nonacm]{acmart}
2
+
3
+ %---------------------------------------------------------------------------
4
+ \usepackage{mathtools}
5
+ \usepackage{listings}
6
+ \usepackage{adjustbox}
7
+
8
+ \lstdefinestyle{mystyle}{
9
+ basicstyle=\ttfamily\footnotesize,
10
+ breakatwhitespace=false,
11
+ breaklines=true,
12
+ captionpos=b,
13
+ columns=fullflexible,
14
+ keepspaces=true,
15
+ showspaces=false,
16
+ showstringspaces=false,
17
+ showtabs=false,
18
+ tabsize=2
19
+ }
20
+ \lstset{style=mystyle}
21
+
22
+ \newfloat{program}{!ht}{}
23
+ \floatname{program}{Program}
24
+
25
+ \DeclareMathOperator{\IM}{Im}
26
+ \DeclareMathOperator{\erfi}{erfi}
27
+
28
+ % Define a visible unnumbered subsubsection
29
+ \newcommand{\modeheader}[1]{\par\vspace{6pt plus 12pt minus 3pt}\pagebreak[1]\noindent\textbf{#1}\nopagebreak[4]}
30
+
31
+ %---------------------------------------------------------------------------
32
+
33
+ \begin{document}
34
+
35
+ \title[User Manual: Python code for generating C code for piecewise Chebyshev approximation]
36
+ {User Manual:\\ Python code for generating C code for piecewise Chebyshev approximation}
37
+
38
+ \author{Joachim Wuttke}
39
+ \email{j.wuttke@fz-juelich.de}
40
+ \orcid{0000-0002-4028-1447}
41
+ \affiliation{%
42
+ \institution{Forschungszentrum Jülich GmbH}
43
+ \city{Jülich Centre for Neutron Science at MLZ, Lichtenbergstraße 1, 85748 Garching}
44
+ \country{Germany}}
45
+
46
+ \author{Alexander Kleinsorge}
47
+ \email{alkl9873@th-wildau.de}
48
+ \affiliation{%
49
+ \institution{Technische Hochschule Wildau}
50
+ \city{Studiengang Telematik, Hochschulring 1, 15745 Wildau}
51
+ \country{Germany}}
52
+
53
+ \begin{abstract}
54
+ This user guide documents Python and C software that implements the algorithms
55
+ described in the article
56
+ ``Algorithm 1xxx: Code generation for piecewise Chebyshev approximation''.
57
+ \end{abstract}
58
+
59
+ \maketitle
60
+
61
+ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
62
+ \section{Introduction}
63
+ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
64
+
65
+ This open-source Python package \textit{ppapp}
66
+ (\textit{p}iecewise \textit{p}olynomial \textit{app}roximation)
67
+ implements the algorithms described in the article
68
+ ``Algorithm 1xxx: Code generation for piecewise Chebyshev approximation'' \cite{WuKl2x}.
69
+ The software is released under the GNU General Public License Version 3 or higher;
70
+ other licensing is negotiable.
71
+
72
+ The package is available in two forms:
73
+ \begin{itemize}
74
+ \item The \textbf{PyPI package} \texttt{ppapp}, installable via \texttt{pip install ppapp},
75
+ contains the Python code generator and demo functions.
76
+ \item The \textbf{project repository} at \url{https://jugit.fz-juelich.de/mlz/ppapp}
77
+ contains additionally the original C++ implementation,
78
+ user manual source, C demonstration code, and development history.
79
+ Paths in Section~\ref{Sdem} referring to \texttt{demo/} are relative to this repository root.
80
+ \end{itemize}
81
+ The software may be further improved if new ideas arise;
82
+ in particular, it shall be extended to tile-wise Taylor approximation in the complex plane \cite{Wut:cgt}.
83
+
84
+ Section~\ref{Sgen} describes the code generator that produces C source files
85
+ containing tables of polynomial coefficients that approximate a specific function~$f$.
86
+ Section~\ref{Sdem} describes how the generated C code is used.
87
+
88
+ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
89
+ \section{Code generator}\label{Sgen}
90
+ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
91
+
92
+ %===================================================================================================
93
+ \subsection{Install and run}\label{SgenInstall}
94
+ %===================================================================================================
95
+
96
+ The generator code is written in the programming language Python3.
97
+ It depends on the arbitrary-precision library \textit{Arb}
98
+ that is part of \textit{FLINT}, Fast Library for Number Theory \cite{s:flint}.
99
+ A Python wrapper of FLINT is available from \url{pypi.org}
100
+ as package \textit{python-flint}.
101
+
102
+ The software can be installed from PyPI with:
103
+ \begin{lstlisting}
104
+ pip install ppapp
105
+ \end{lstlisting}
106
+ This automatically installs the \textit{python-flint} dependency
107
+ and provides the command:
108
+ \begin{lstlisting}
109
+ ppapp
110
+ \end{lstlisting}
111
+
112
+ Alternatively, when working with the source repository without installing,
113
+ run from the directory containing the \texttt{ppapp} package:
114
+ \begin{lstlisting}
115
+ cd /path/to/ppapp/py/R
116
+ python -m ppapp <mode> <arguments>
117
+ \end{lstlisting}
118
+
119
+ Running \texttt{ppapp} without arguments prints a summary of available modes:
120
+ \begin{lstlisting}
121
+ No mode given
122
+
123
+ Usage:
124
+
125
+ ppapp i <f_module> - run initial tests from my_testcases
126
+ ppapp v <f_module> <x> - print function value f(x)
127
+ ppapp n <f_module> <M> <Nmax> <E> - print N_min(M',E), up to given Nmax
128
+ ppapp e <f_module> <M> <N> - print maximum relative error, in units of epsilon
129
+ ppapp c <f_module> <M> <N> [<E>] - print plain table of Chebyshev coefficients c_n
130
+ ppapp p <f_module> <M> <N> [<E>] - print plain table of economized coefficients p_m
131
+ ppapp s <f_module> <M> <N> [<E>] - print C source defining economized coefficients p_m
132
+ ppapp t <f_module> <M> <Nxo> <E> - print C source defining test cases
133
+
134
+ where
135
+
136
+ <f_module> - path to function definition file (e.g., 'mydir/f_imwofx.py')
137
+ <M> - integer M >= 0 specifies 2^M subdomains per octave
138
+ <N> - integer N >= 1 is the polynomial degree
139
+ <E> - double E > 0 is the maximum relative error, in units of epsilon=2^-53
140
+ <Nxo> - number of extra (non-Chebyshev) octaves on each side of the Chebyshev range
141
+ \end{lstlisting}
142
+
143
+ %===================================================================================================
144
+ \subsection{Function argument}\label{SgenFunc}
145
+ %===================================================================================================
146
+
147
+ All commands require a function definition file as the second argument:
148
+ \begin{lstlisting}
149
+ ppapp v /path/to/my_function.py 1.0
150
+ \end{lstlisting}
151
+
152
+ The function definition file specifies the interface between the generic approximation machinery
153
+ and the specific target function that is to be approximated.
154
+ It defines one function and two global objects:
155
+ \begin{lstlisting}[language=Python]
156
+ def my_arb_f(X: arb, prec: int) -> arb:
157
+ """Evaluates f(x) with given precision"""
158
+ ...
159
+
160
+ my_domain: Tuple[float, float] = (a, b)
161
+ my_testcases: List[Tuple[float, float, float]] = [
162
+ (x, f_expected, tolerance),
163
+ ...
164
+ ]
165
+ \end{lstlisting}
166
+ Function \texttt{my\_arb\_f} computes $f(x)$ in interval arithmetics
167
+ with a precision of \texttt{prec} binary digits,
168
+ using the python-flint wrapper for Arb \cite{s:flint}.
169
+ The tuple \texttt{my\_domain} contains the limits of the total domain $[a,b)$.
170
+ The entries in the list \texttt{my\_testcases} are triples $(x, f_\text{expected}(x), \text{tol})$.
171
+ The test suite will fail unless for each test case,
172
+ the function value $f(x)$, computed by our arbitrary-precision function \texttt{my\_arb\_f},
173
+ agrees with $f_\text{expected}(x)$ with a relative error not larger than $\text{tol}$.
174
+
175
+ As an example, the package includes \texttt{ppapp/demo\_functions/imwofx.py}
176
+ that implements the function
177
+ \begin{align}
178
+ f(x)\coloneqq \exp(-x^2)\erfi(x) \equiv \IM\,w(x)
179
+ \end{align}
180
+ introduced in \cite[Sect 1.3]{WuKl2x}.
181
+ The arbitrary-precision computation of~$f$ is straightforward because Arb supports $\erfi(x)$
182
+ as built-in method \texttt{arb.erfi()}.
183
+ The domain is $[a,b)=[0.5,12)$.
184
+ The test cases allow a tolerance of $10^{-5}$,
185
+ i.~e.\ they are meant to ensure the basic correctness of the high-precision implementation
186
+ but not its accuracy.
187
+ The latter is not a concern because of the intrinsic accuracy control of Arb.
188
+ To support any other function~$f$,
189
+ one needs to write a new implementation file,
190
+ based on the model provided by~\texttt{ppapp/demo\_functions/imwofx.py}.
191
+
192
+ A second example, \texttt{ppapp/demo\_functions/polynomial.py}, implements the simple polynomial
193
+ \begin{align}
194
+ f(x) \coloneqq x^3 - x^2 + x - 1
195
+ \end{align}
196
+ over the domain $[1.5, 4)$.
197
+ This serves as a useful test case because the Chebyshev approximation of an exact
198
+ degree-3 polynomial should yield only four significant coefficients ($p_0$ through $p_3$),
199
+ with all higher-order coefficients being negligible (at the level of numerical noise).
200
+
201
+ %===================================================================================================
202
+ \subsection{Run modes}\label{SgenRun}
203
+ %===================================================================================================
204
+
205
+ The program \texttt{ppapp} operates in different modes that are selected by a letter
206
+ provided as first command-line argument.
207
+ All output is written to \texttt{stdout};
208
+ use redirection to save it in a file.
209
+
210
+ \modeheader{Initial test mode.}
211
+ \begin{lstlisting}
212
+ ppapp i <f_module>
213
+ \end{lstlisting}
214
+ Tests the function implementation against the test cases defined in \texttt{my\_testcases}.
215
+ This verifies that the high-precision reference function is basically correct
216
+ before using it for coefficient generation.
217
+
218
+ \modeheader{Function value mode.}
219
+ \begin{lstlisting}
220
+ ppapp v <f_module> <x>
221
+ \end{lstlisting}
222
+ Computes a single function value $f(x)$.
223
+
224
+ \modeheader{Minimal degree mode.}
225
+ \begin{lstlisting}
226
+ ppapp n <f_module> <M> <Nmax> <relerr>
227
+ \end{lstlisting}
228
+ Computes the minimal polynomial degree for which the relative error,
229
+ in units of $\epsilon$, is not larger than the given \texttt{relerr}.
230
+ This mode has been used to produce Table~1 in \cite{WuKl2x}.
231
+
232
+ \modeheader{Error bound mode.}
233
+ \begin{lstlisting}
234
+ ppapp e <f_module> <M> <N>
235
+ \end{lstlisting}
236
+ Prints an upper bound for the total relative error in units of~$\epsilon$.
237
+ Based on results from modes \texttt{n} and \texttt{e},
238
+ make your choice of $M$ and $N$,
239
+ as discussed in \cite[Sect~4.4]{WuKl2x}.
240
+
241
+ \modeheader{Coefficient table modes.}
242
+ \begin{lstlisting}
243
+ ppapp c <f_module> <M> <N> [<relerr>]
244
+ ppapp p <f_module> <M> <N> [<relerr>]
245
+ \end{lstlisting}
246
+ Prints a table
247
+ of Chebyshev coefficients $c_{ln}$ (mode \texttt{c})
248
+ or of economized polynomial coefficients $p_{ln}$ (mode \texttt{p}).
249
+ These modes have been used to produce Fig~3 of~\cite{WuKl2x}.
250
+ The optional \texttt{relerr} argument, in units of~$\epsilon$,
251
+ activates tests that ensure that the relative error never exceeds this bound.
252
+
253
+ \modeheader{C source code mode.}
254
+ \begin{lstlisting}
255
+ ppapp s <f_module> <M> <N> [<relerr>]
256
+ \end{lstlisting}
257
+ Prints C source code with arrays that hold the $p_{ln}$.
258
+
259
+ \modeheader{Test case mode.}
260
+ \begin{lstlisting}
261
+ ppapp t <f_module> <M> <Nxo> <relerr>
262
+ \end{lstlisting}
263
+ Prints C source code with test cases, covering a range $a 2^{-N_\text{xo}} \ldots b 2^{N_\text{xo}}$
264
+ that extends beyond the Chebyshev domain if $N_\text{xo}>0$.
265
+
266
+ %===================================================================================================
267
+ \subsection{Hexadecimal output}
268
+ %===================================================================================================
269
+
270
+ The output files are self-explaining thanks to initial comment lines.
271
+ Let us explain just one detail:
272
+ In the auto-generated C source files,
273
+ floating-point numbers are written in hexadecimal format, like
274
+ \begin{lstlisting}
275
+ 0x1.ef90904c7eeeep-2, 0x1.461380c17af85p-8, -0x1.9d5e2f887fe99p-15, -0x1.35c476fb1ab4ap-24, ...
276
+ \end{lstlisting}
277
+ Note that the letter \texttt{p} is followed by the base 2 exponent in decimal notation,
278
+ i.e.\ \texttt{0x1.8p-13} is $1.5\cdot2^{-13}$.
279
+
280
+ %===================================================================================================
281
+ \subsection{Unit tests}\label{SGenTests}
282
+ %===================================================================================================
283
+
284
+ The PyPI package includes a comprehensive test suite.
285
+ When working with the repository, the tests can be run with:
286
+ \begin{lstlisting}
287
+ python3 -m pytest tests/ -v
288
+ \end{lstlisting}
289
+
290
+ The test suite covers
291
+ mathematical helper functions,
292
+ power law analysis,
293
+ error bounds,
294
+ subdomain computation,
295
+ output formatting,
296
+ function modules,
297
+ Chebyshev coefficient computation,
298
+ polynomial approximation accuracy,
299
+ and full pipeline integration.
300
+
301
+ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
302
+ \section{Usage demonstrator}\label{Sdem}
303
+ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
304
+
305
+ The C source code generated by \texttt{ppapp} can be used in C or C++ projects
306
+ to compute function values~$f(x)$.
307
+ In a typical application, this code would be integrated with other code that
308
+ evaluates~$f$ outside the intermediate domain considered here,
309
+ using expansions for small and large~$x$.
310
+
311
+ A C demonstration implementation is available in directory \texttt{demo/R/outcome}
312
+ of the project repository (not included in the PyPI package).
313
+ It shows how to use the auto-generated coefficients for efficient function evaluation.
314
+
315
+ The target algorithm for evaluating the piecewise polynomial approximation
316
+ is also illustrated in the Python module \texttt{ppapp.target\_algorithm}
317
+ (included in the PyPI package).
318
+ This plain Python implementation is for illustration purposes only.
319
+ The \textit{ppapp} project is designed to generate C code for optimized high-throughput computation.
320
+ For production use from Python, the evaluation code should be implemented
321
+ as a C extension module or at least use NumPy for vectorized operations.
322
+ The Python script should not be used as a template for production code.
323
+
324
+ %===================================================================================================
325
+ \subsection{Alignment specifier}
326
+ %===================================================================================================
327
+
328
+ In order to minimize the number of cache loads,
329
+ the auto-generated arrays that hold tables of polynomial coefficients
330
+ must start at the beginning of a 64 bytes memory block \cite[Sect 2.3]{WuKl2x}.
331
+ This is achieved by defining these arrays as
332
+ \begin{lstlisting}[language=C]
333
+ alignas(64) static const double ppapp_Coeffs0[...] = { ... };
334
+ \end{lstlisting}
335
+ The specifier \texttt{alignas} is defined in the language standards C23 and C++11.
336
+ For older versions of C it is not in the standard,
337
+ but may be supported as a compiler extension.
338
+ C11 has a specifier \texttt{\_Alignas}.
339
+ For even older language variants one would depend on compiler-specific attributes.
340
+
341
+ The Python code generator automatically inserts the correct alignment directives
342
+ and pads coefficient tables with zeros when necessary to maintain proper alignment
343
+ \cite[Sect 2.4]{WuKl2x}.
344
+
345
+ In the project repository, the C demonstration executable is built and run with the commands
346
+ \begin{lstlisting}
347
+ cd demo/R/outcome
348
+ mkdir build
349
+ cd build
350
+ cmake ..
351
+ make
352
+ ./statistics g <n>
353
+ ./statistics h <n>
354
+ \end{lstlisting}
355
+ In mode \texttt{g}, the relative deviation between the polynomial approximation
356
+ and the reference code is computed for $n$ values of $x$,
357
+ regularly spaced on a logarithmic scale.
358
+ In mode \texttt{h}, $n$ random-drawn values of~$x$ are used to accumulate a histogram
359
+ of absolute values of the relative deviation.
360
+
361
+ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
362
+ %\section*{Acknowledgement}
363
+ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
364
+
365
+ \bibliographystyle{switch}
366
+ \bibliography{jw8}
367
+
368
+ \end{document}