koffi 1.3.2 → 1.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +27 -3
- package/ChangeLog.md +46 -14
- package/build/qemu/1.3.5/koffi_darwin_arm64.tar.gz +0 -0
- package/build/qemu/1.3.5/koffi_darwin_x64.tar.gz +0 -0
- package/build/qemu/1.3.5/koffi_freebsd_arm64.tar.gz +0 -0
- package/build/qemu/1.3.5/koffi_freebsd_ia32.tar.gz +0 -0
- package/build/qemu/1.3.5/koffi_freebsd_x64.tar.gz +0 -0
- package/build/qemu/1.3.5/koffi_linux_arm32hf.tar.gz +0 -0
- package/build/qemu/1.3.5/koffi_linux_arm64.tar.gz +0 -0
- package/build/qemu/1.3.5/koffi_linux_ia32.tar.gz +0 -0
- package/build/qemu/1.3.5/koffi_linux_riscv64hf64.tar.gz +0 -0
- package/build/qemu/1.3.5/koffi_linux_x64.tar.gz +0 -0
- package/build/qemu/1.3.5/koffi_openbsd_ia32.tar.gz +0 -0
- package/build/qemu/1.3.5/koffi_openbsd_x64.tar.gz +0 -0
- package/build/qemu/1.3.5/koffi_win32_arm64.tar.gz +0 -0
- package/build/qemu/1.3.5/koffi_win32_ia32.tar.gz +0 -0
- package/build/qemu/1.3.5/koffi_win32_x64.tar.gz +0 -0
- package/doc/_static/perf_linux_20220623.png +0 -0
- package/doc/_static/perf_linux_20220623_2.png +0 -0
- package/doc/_static/perf_windows_20220623.png +0 -0
- package/doc/_static/perf_windows_20220623_2.png +0 -0
- package/doc/benchmarks.md +40 -36
- package/doc/benchmarks.xlsx +0 -0
- package/doc/changes.md +2 -0
- package/doc/conf.py +10 -3
- package/doc/contribute.md +16 -0
- package/doc/dist/doctrees/benchmarks.doctree +0 -0
- package/doc/dist/doctrees/changes.doctree +0 -0
- package/doc/dist/doctrees/contribute.doctree +0 -0
- package/doc/dist/doctrees/environment.pickle +0 -0
- package/doc/dist/doctrees/functions.doctree +0 -0
- package/doc/dist/doctrees/index.doctree +0 -0
- package/doc/dist/doctrees/memory.doctree +0 -0
- package/doc/dist/doctrees/platforms.doctree +0 -0
- package/doc/dist/doctrees/start.doctree +0 -0
- package/doc/dist/doctrees/types.doctree +0 -0
- package/doc/dist/html/_sources/benchmarks.md.txt +40 -36
- package/doc/dist/html/_sources/changes.md.txt +2 -0
- package/doc/dist/html/_sources/contribute.md.txt +16 -0
- package/doc/dist/html/_sources/functions.md.txt +18 -14
- package/doc/dist/html/_sources/index.rst.txt +2 -1
- package/doc/dist/html/_sources/memory.md.txt +6 -3
- package/doc/dist/html/_sources/platforms.md.txt +2 -0
- package/doc/dist/html/_sources/start.md.txt +3 -3
- package/doc/dist/html/_sources/types.md.txt +10 -8
- package/doc/dist/html/_static/perf_linux_20220623.png +0 -0
- package/doc/dist/html/_static/perf_linux_20220623_2.png +0 -0
- package/doc/dist/html/_static/perf_windows_20220623.png +0 -0
- package/doc/dist/html/_static/perf_windows_20220623_2.png +0 -0
- package/doc/dist/html/_static/pygments.css +54 -54
- package/doc/dist/html/benchmarks.html +52 -20
- package/doc/dist/html/changes.html +391 -0
- package/doc/dist/html/contribute.html +24 -2
- package/doc/dist/html/functions.html +83 -84
- package/doc/dist/html/genindex.html +1 -0
- package/doc/dist/html/index.html +18 -3
- package/doc/dist/html/memory.html +11 -5
- package/doc/dist/html/objects.inv +0 -0
- package/doc/dist/html/platforms.html +3 -1
- package/doc/dist/html/search.html +1 -0
- package/doc/dist/html/searchindex.js +1 -1
- package/doc/dist/html/start.html +48 -47
- package/doc/dist/html/types.html +161 -159
- package/doc/functions.md +18 -14
- package/doc/index.rst +2 -1
- package/doc/memory.md +6 -3
- package/doc/platforms.md +2 -0
- package/doc/start.md +3 -3
- package/doc/types.md +10 -8
- package/package.json +2 -2
- package/qemu/qemu.js +1 -0
- package/qemu/registry/machines.json +6 -11
- package/src/abi_arm32.cc +9 -9
- package/src/abi_arm64.cc +9 -9
- package/src/abi_riscv64.cc +9 -9
- package/src/abi_x64_sysv.cc +9 -9
- package/src/abi_x64_win.cc +9 -9
- package/src/abi_x86.cc +9 -9
- package/src/call.cc +8 -7
- package/src/call.hh +6 -0
- package/src/ffi.cc +73 -22
- package/src/ffi.hh +11 -4
- package/src/parser.cc +1 -1
- package/src/util.hh +21 -1
- package/test/async.js +1 -1
- package/test/misc.c +20 -0
- package/test/sync.js +13 -3
- package/vendor/libcc/libcc.hh +1 -1
- package/build/qemu/1.3.2/koffi_darwin_arm64.tar.gz +0 -0
- package/build/qemu/1.3.2/koffi_darwin_x64.tar.gz +0 -0
- package/build/qemu/1.3.2/koffi_freebsd_arm64.tar.gz +0 -0
- package/build/qemu/1.3.2/koffi_freebsd_ia32.tar.gz +0 -0
- package/build/qemu/1.3.2/koffi_freebsd_x64.tar.gz +0 -0
- package/build/qemu/1.3.2/koffi_linux_arm32hf.tar.gz +0 -0
- package/build/qemu/1.3.2/koffi_linux_arm64.tar.gz +0 -0
- package/build/qemu/1.3.2/koffi_linux_ia32.tar.gz +0 -0
- package/build/qemu/1.3.2/koffi_linux_riscv64hf64.tar.gz +0 -0
- package/build/qemu/1.3.2/koffi_linux_x64.tar.gz +0 -0
- package/build/qemu/1.3.2/koffi_openbsd_ia32.tar.gz +0 -0
- package/build/qemu/1.3.2/koffi_openbsd_x64.tar.gz +0 -0
- package/build/qemu/1.3.2/koffi_win32_arm64.tar.gz +0 -0
- package/build/qemu/1.3.2/koffi_win32_ia32.tar.gz +0 -0
- package/build/qemu/1.3.2/koffi_win32_x64.tar.gz +0 -0
package/CMakeLists.txt
CHANGED
|
@@ -14,6 +14,13 @@
|
|
|
14
14
|
cmake_minimum_required(VERSION 3.6)
|
|
15
15
|
project(koffi C CXX ASM)
|
|
16
16
|
|
|
17
|
+
include(CheckCXXCompilerFlag)
|
|
18
|
+
if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.9.0")
|
|
19
|
+
cmake_policy(SET CMP0069 NEW)
|
|
20
|
+
include(CheckIPOSupported)
|
|
21
|
+
check_ipo_supported(RESULT USE_LTO)
|
|
22
|
+
endif()
|
|
23
|
+
|
|
17
24
|
find_package(CNoke)
|
|
18
25
|
|
|
19
26
|
if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.8.0")
|
|
@@ -22,7 +29,7 @@ else()
|
|
|
22
29
|
set(CMAKE_CXX_STANDARD 14)
|
|
23
30
|
endif()
|
|
24
31
|
if(MSVC)
|
|
25
|
-
add_compile_options(/W4 /wd4200 /wd4458 /wd4706 /wd4100 /wd4127 /wd4702 /wd4201 /wd4324)
|
|
32
|
+
add_compile_options(/Zc:__cplusplus /W4 /wd4200 /wd4458 /wd4706 /wd4100 /wd4127 /wd4702 /wd4201 /wd4324)
|
|
26
33
|
|
|
27
34
|
# ASM_MASM does not (yet) work on Windows ARM64
|
|
28
35
|
if(NOT CMAKE_GENERATOR_PLATFORM MATCHES "ARM64")
|
|
@@ -47,10 +54,16 @@ set(KOFFI_SRC
|
|
|
47
54
|
if(CMAKE_SIZEOF_VOID_P EQUAL 8)
|
|
48
55
|
# CMAKE_SYSTEM_PROCESSOR is wrong on Windows ARM64
|
|
49
56
|
|
|
50
|
-
if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch|arm" OR CMAKE_GENERATOR_PLATFORM STREQUAL "ARM64")
|
|
57
|
+
if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch|arm" OR CMAKE_GENERATOR_PLATFORM STREQUAL "ARM64" OR CMAKE_OSX_ARCHITECTURES MATCHES "arm")
|
|
51
58
|
if(WIN32)
|
|
52
59
|
get_filename_component(cl_dir "${CMAKE_CXX_COMPILER}" DIRECTORY)
|
|
53
60
|
file(TO_CMAKE_PATH "${cl_dir}/armasm64.exe" asm_compiler)
|
|
61
|
+
|
|
62
|
+
# Work around missing ARM64-native ARMASM64 compiler (at least in VS 17.3 Preview 2)
|
|
63
|
+
if(NOT EXISTS "${asm_compiler}")
|
|
64
|
+
file(TO_CMAKE_PATH "${cl_dir}/../../Hostx64/arm64/armasm64.exe" asm_compiler)
|
|
65
|
+
endif()
|
|
66
|
+
|
|
54
67
|
message(STATUS "Using ARMASM64 compiler: ${asm_compiler}")
|
|
55
68
|
|
|
56
69
|
file(TO_CMAKE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/src/abi_arm64_fwd.asm" asm_source)
|
|
@@ -98,5 +111,16 @@ if(WIN32)
|
|
|
98
111
|
target_link_libraries(koffi PRIVATE ws2_32)
|
|
99
112
|
endif()
|
|
100
113
|
if(NOT MSVC)
|
|
101
|
-
|
|
114
|
+
# Restore C/C++ compiler sanity
|
|
115
|
+
|
|
116
|
+
target_compile_options(koffi PRIVATE -fno-exceptions -fno-strict-aliasing -fwrapv
|
|
117
|
+
-fno-delete-null-pointer-checks)
|
|
118
|
+
|
|
119
|
+
check_cxx_compiler_flag(-fno-finite-loops use_no_finite_loops)
|
|
120
|
+
if(use_no_finite_loops)
|
|
121
|
+
target_compile_options(koffi PRIVATE -fno-finite-loops)
|
|
122
|
+
endif()
|
|
123
|
+
endif()
|
|
124
|
+
if(USE_LTO)
|
|
125
|
+
set_target_properties(koffi PROPERTIES INTERPROCEDURAL_OPTIMIZATION TRUE)
|
|
102
126
|
endif()
|
package/ChangeLog.md
CHANGED
|
@@ -1,24 +1,56 @@
|
|
|
1
|
-
#
|
|
1
|
+
# Changelog
|
|
2
2
|
|
|
3
|
-
##
|
|
3
|
+
## Koffi 1.3.5
|
|
4
|
+
|
|
5
|
+
**Main changes:**
|
|
6
|
+
|
|
7
|
+
- Fix memory leak when many async calls are running
|
|
8
|
+
- Add configurable limit for maximum number of async calls (max_async_calls)
|
|
9
|
+
|
|
10
|
+
**Other changes:**
|
|
11
|
+
|
|
12
|
+
- Reduce default async memory stack and heap size
|
|
13
|
+
- Various documentation improvements
|
|
14
|
+
|
|
15
|
+
## Koffi 1.3.4
|
|
16
|
+
|
|
17
|
+
**Main fixes:**
|
|
18
|
+
|
|
19
|
+
- Fix possible OpenBSD i386 crash with `(void)` functions
|
|
20
|
+
|
|
21
|
+
## Koffi 1.3.3
|
|
22
|
+
|
|
23
|
+
**Main fixes:**
|
|
24
|
+
|
|
25
|
+
- Fix misconversion of signed integer return value as unsigned
|
|
26
|
+
|
|
27
|
+
**Other changes:**
|
|
28
|
+
|
|
29
|
+
- Support `(void)` (empty) function signatures
|
|
30
|
+
- Disable unsafe compiler optimizations
|
|
31
|
+
- Various documentation improvements
|
|
32
|
+
|
|
33
|
+
## Koffi 1.3.2
|
|
34
|
+
|
|
35
|
+
**Main fixes:**
|
|
4
36
|
|
|
5
37
|
- Support compilation in C++14 mode (graceful degradation)
|
|
6
38
|
- Support older toolchains on Linux (tested on Debian 9)
|
|
7
39
|
|
|
8
|
-
|
|
40
|
+
## Koffi 1.3.1
|
|
9
41
|
|
|
10
|
-
|
|
42
|
+
**Main fixes:**
|
|
11
43
|
|
|
12
44
|
- The prebuilt binary is tested when Koffi is installed, and a rebuild happens if it fails to load
|
|
13
45
|
|
|
14
|
-
|
|
46
|
+
## Koffi 1.3.0
|
|
15
47
|
|
|
16
|
-
|
|
48
|
+
**Major changes:**
|
|
17
49
|
|
|
18
50
|
- Expand and move documentation to https://koffi.dev/
|
|
19
51
|
- Support JS arrays and TypedArrays for pointer arguments (input, output and mixed)
|
|
20
52
|
|
|
21
|
-
|
|
53
|
+
**Other changes:**
|
|
22
54
|
|
|
23
55
|
- Convert NULL string pointers to null instead of crashing (return values, struct and array members, callbacks)
|
|
24
56
|
- Default to 'string' array hint for char, char16 and char16_t arrays
|
|
@@ -27,23 +59,23 @@
|
|
|
27
59
|
- Detect floating-point ABI before using prebuilt binaries (ARM32, RISC-V)
|
|
28
60
|
- Forbid duplicate member names in struct types
|
|
29
61
|
|
|
30
|
-
|
|
62
|
+
## Koffi 1.2.4
|
|
31
63
|
|
|
32
|
-
|
|
64
|
+
**New features:**
|
|
33
65
|
|
|
34
66
|
- Windows ARM64 is now supported
|
|
35
67
|
|
|
36
|
-
|
|
68
|
+
## Koffi 1.2.3
|
|
37
69
|
|
|
38
|
-
|
|
70
|
+
**New features:**
|
|
39
71
|
|
|
40
72
|
- A prebuilt binary for macOS ARM64 (M1) is now included
|
|
41
73
|
|
|
42
|
-
|
|
74
|
+
## Koffi 1.2.1
|
|
43
75
|
|
|
44
76
|
This entry documents changes since version 1.1.0.
|
|
45
77
|
|
|
46
|
-
|
|
78
|
+
**New features:**
|
|
47
79
|
|
|
48
80
|
- JS functions can be used as C callbacks (cdecl, stdcall) on all platforms
|
|
49
81
|
- RISC-V 64 LP64D ABI is supported (LP64 is untested)
|
|
@@ -51,7 +83,7 @@ This entry documents changes since version 1.1.0.
|
|
|
51
83
|
- Transparent conversion between C buffers and strings
|
|
52
84
|
- Tentative support for Windows ARM64 (untested)
|
|
53
85
|
|
|
54
|
-
|
|
86
|
+
**Main fixes:**
|
|
55
87
|
|
|
56
88
|
- Fix excessive stack alignment of structs on x86 platforms
|
|
57
89
|
- Fix potential problems with big int64_t/uint64_t values
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/doc/benchmarks.md
CHANGED
|
@@ -1,11 +1,15 @@
|
|
|
1
1
|
# Benchmarks
|
|
2
2
|
|
|
3
|
-
Here is a quick overview of the execution time of Koffi calls on three
|
|
3
|
+
Here is a quick overview of the execution time of Koffi calls on three benchmarks, where it is compared to a theoretical ideal FFI implementation (approximated with pre-compiled static N-API glue code):
|
|
4
|
+
|
|
5
|
+
- The first benchmark is based on `rand()` calls
|
|
6
|
+
- The second benchmark is based on `atoi()` calls
|
|
7
|
+
- The third benchmark is based on [Raylib](https://www.raylib.com/)
|
|
4
8
|
|
|
5
9
|
<table style="margin: 0 auto;">
|
|
6
10
|
<tr>
|
|
7
|
-
<td><img src="_static/
|
|
8
|
-
<td><img src="_static/
|
|
11
|
+
<td><a href="_static/perf_linux_20220623_2.png" target="_blank"><img src="_static/perf_linux_20220623_2.png" alt="Linux performance" style="width: 350px;"/></a></td>
|
|
12
|
+
<td><a href="_static/perf_windows_20220623_2.png" target="_blank"><img src="_static/perf_windows_20220623_2.png" alt="Windows performance" style="width: 350px;"/></a></td>
|
|
9
13
|
</tr>
|
|
10
14
|
</table>
|
|
11
15
|
|
|
@@ -15,7 +19,7 @@ These results are detailed and explained below, and compared to node-ffi/node-ff
|
|
|
15
19
|
|
|
16
20
|
This test is based around repeated calls to a simple standard C function atoi, and has three implementations:
|
|
17
21
|
|
|
18
|
-
- the first one is the reference, it calls atoi through an N-API module, and is close to the theoretical limit of a perfect (no overhead) Node.js > C FFI implementation
|
|
22
|
+
- the first one is the reference, it calls atoi through an N-API module, and is close to the theoretical limit of a perfect (no overhead) Node.js > C FFI implementation (pre-compiled static glue code)
|
|
19
23
|
- the second one calls atoi through Koffi
|
|
20
24
|
- the third one uses the official Node.js FFI implementation, node-ffi-napi
|
|
21
25
|
|
|
@@ -25,21 +29,21 @@ Because rand is a pretty small function, the FFI overhead is clearly visible.
|
|
|
25
29
|
|
|
26
30
|
The results below were measured on my x86_64 Linux machine (AMD® Ryzen™ 7 4700U):
|
|
27
31
|
|
|
28
|
-
Benchmark | Iterations | Total time | Overhead
|
|
29
|
-
------------- | ---------- | ----------- | ----------
|
|
30
|
-
rand_napi | 20000000 | 1.44s | (baseline)
|
|
31
|
-
rand_koffi | 20000000 | 2.60s |
|
|
32
|
-
rand_node_ffi | 20000000 | 107.58s |
|
|
32
|
+
Benchmark | Iterations | Total time | Relative performance | Overhead
|
|
33
|
+
------------- | ---------- | ----------- | -------------------- | ----------
|
|
34
|
+
rand_napi | 20000000 | 1.44s | (baseline) | (baseline)
|
|
35
|
+
rand_koffi | 20000000 | 2.60s | x0.55 | +81%
|
|
36
|
+
rand_node_ffi | 20000000 | 107.58s | x0.01 | +7400%
|
|
33
37
|
|
|
34
38
|
### Windows x86_64
|
|
35
39
|
|
|
36
40
|
The results below were measured on my x86_64 Windows machine (Intel® Core™ i5-4460):
|
|
37
41
|
|
|
38
|
-
Benchmark | Iterations | Total time | Overhead
|
|
39
|
-
------------- | ---------- | ----------- | ----------
|
|
40
|
-
rand_napi | 20000000 | 2.10s | (baseline)
|
|
41
|
-
rand_koffi | 20000000 | 3.87s |
|
|
42
|
-
rand_node_ffi | 20000000 | 87.84s |
|
|
42
|
+
Benchmark | Iterations | Total time | Relative performance | Overhead
|
|
43
|
+
------------- | ---------- | ----------- | -------------------- | ----------
|
|
44
|
+
rand_napi | 20000000 | 2.10s | (baseline) | (baseline)
|
|
45
|
+
rand_koffi | 20000000 | 3.87s | x0.54 | +84%
|
|
46
|
+
rand_node_ffi | 20000000 | 87.84s | x0.02 | +4100%
|
|
43
47
|
|
|
44
48
|
## atoi results
|
|
45
49
|
|
|
@@ -51,21 +55,21 @@ Because rand is a pretty small function, the FFI overhead is clearly visible.
|
|
|
51
55
|
|
|
52
56
|
The results below were measured on my x86_64 Linux machine (AMD® Ryzen™ 7 4700U):
|
|
53
57
|
|
|
54
|
-
Benchmark | Iterations | Total time | Overhead
|
|
55
|
-
------------- | ---------- | ----------- | ----------
|
|
56
|
-
atoi_napi | 20000000 | 2.97s | (baseline)
|
|
57
|
-
atoi_koffi | 20000000 | 5.07s |
|
|
58
|
-
atoi_node_ffi | 20000000 | 693.16s |
|
|
58
|
+
Benchmark | Iterations | Total time | Relative performance | Overhead
|
|
59
|
+
------------- | ---------- | ----------- | -------------------- | ----------
|
|
60
|
+
atoi_napi | 20000000 | 2.97s | (baseline) | (baseline)
|
|
61
|
+
atoi_koffi | 20000000 | 5.07s | x0.58 | +71%
|
|
62
|
+
atoi_node_ffi | 20000000 | 693.16s | x0.005 | +23000%
|
|
59
63
|
|
|
60
64
|
### Windows x86_64
|
|
61
65
|
|
|
62
66
|
The results below were measured on my x86_64 Windows machine (Intel® Core™ i5-4460):
|
|
63
67
|
|
|
64
|
-
Benchmark | Iterations | Total time | Overhead
|
|
65
|
-
------------- | ---------- | ----------- | ----------
|
|
66
|
-
atoi_napi | 20000000 | 2.97s | (baseline)
|
|
67
|
-
atoi_koffi | 20000000 | 5.91s |
|
|
68
|
-
atoi_node_ffi | 20000000 | 479.34s |
|
|
68
|
+
Benchmark | Iterations | Total time | Relative performance | Overhead
|
|
69
|
+
------------- | ---------- | ----------- | -------------------- | ----------
|
|
70
|
+
atoi_napi | 20000000 | 2.97s | (baseline) | (baseline)
|
|
71
|
+
atoi_koffi | 20000000 | 5.91s | x0.50 | +99%
|
|
72
|
+
atoi_node_ffi | 20000000 | 479.34s | x0.006 | +16000%
|
|
69
73
|
|
|
70
74
|
## Raylib results
|
|
71
75
|
|
|
@@ -78,23 +82,23 @@ This benchmark uses the CPU-based image drawing functions in Raylib. The calls a
|
|
|
78
82
|
|
|
79
83
|
The results below were measured on my x86_64 Linux machine (AMD® Ryzen™ 7 4700U):
|
|
80
84
|
|
|
81
|
-
Benchmark | Iterations | Total time | Overhead
|
|
82
|
-
--------------- | ---------- | ----------- | ----------
|
|
83
|
-
raylib_cc | 100 | 9.31s |
|
|
84
|
-
raylib_node_raylib | 100 | 10.90s |
|
|
85
|
-
raylib_koffi | 100 | 12.86s |
|
|
86
|
-
raylib_node_ffi | 100 | 35.76s |
|
|
85
|
+
Benchmark | Iterations | Total time | Relative performance | Overhead
|
|
86
|
+
--------------- | ---------- | ----------- | -------------------- | ----------
|
|
87
|
+
raylib_cc | 100 | 9.31s | x1.17 | -15%
|
|
88
|
+
raylib_node_raylib | 100 | 10.90s | (baseline) | (baseline)
|
|
89
|
+
raylib_koffi | 100 | 12.86s | x0.84 | +18%
|
|
90
|
+
raylib_node_ffi | 100 | 35.76s | x0.30 | +228%
|
|
87
91
|
|
|
88
92
|
### Windows x86_64
|
|
89
93
|
|
|
90
94
|
The results below were measured on my x86_64 Windows machine (Intel® Core™ i5-4460):
|
|
91
95
|
|
|
92
|
-
Benchmark | Iterations | Total time | Overhead
|
|
93
|
-
--------------- | ---------- | ----------- | ----------
|
|
94
|
-
raylib_cc | 100 | 10.67s |
|
|
95
|
-
raylib_node_raylib | 100 | 12.05s |
|
|
96
|
-
raylib_koffi | 100 | 14.84s |
|
|
97
|
-
raylib_node_ffi | 100 | 44.63s |
|
|
96
|
+
Benchmark | Iterations | Total time | Relative performance | Overhead
|
|
97
|
+
--------------- | ---------- | ----------- | -------------------- | ----------
|
|
98
|
+
raylib_cc | 100 | 10.67s | x1.17 | -12%
|
|
99
|
+
raylib_node_raylib | 100 | 12.05s | (baseline) | (baseline)
|
|
100
|
+
raylib_koffi | 100 | 14.84s | x0.81 | +23%
|
|
101
|
+
raylib_node_ffi | 100 | 44.63s | x0.27 | +270%
|
|
98
102
|
|
|
99
103
|
## Running benchmarks
|
|
100
104
|
|
package/doc/benchmarks.xlsx
CHANGED
|
Binary file
|
package/doc/changes.md
ADDED
package/doc/conf.py
CHANGED
|
@@ -1,10 +1,17 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
|
|
1
4
|
# -- Project information -----------------------------------------------------
|
|
2
5
|
|
|
3
6
|
project = 'Koffi'
|
|
4
7
|
copyright = '2022, Niels Martignène'
|
|
5
8
|
author = 'Niels Martignène'
|
|
6
|
-
|
|
7
|
-
|
|
9
|
+
|
|
10
|
+
with open(os.path.dirname(__file__) + '/../package.json') as f:
|
|
11
|
+
config = json.load(f)
|
|
12
|
+
|
|
13
|
+
version = config['version']
|
|
14
|
+
revision = config['version']
|
|
8
15
|
|
|
9
16
|
# -- General configuration ---------------------------------------------------
|
|
10
17
|
|
|
@@ -20,7 +27,7 @@ exclude_patterns = []
|
|
|
20
27
|
|
|
21
28
|
# -- Options for HTML output -------------------------------------------------
|
|
22
29
|
|
|
23
|
-
html_title =
|
|
30
|
+
html_title = project
|
|
24
31
|
|
|
25
32
|
html_theme = 'furo'
|
|
26
33
|
|
package/doc/contribute.md
CHANGED
|
@@ -10,6 +10,15 @@ Go here: https://github.com/Koromix/luigi/issues
|
|
|
10
10
|
|
|
11
11
|
We provide prebuilt binaries, packaged in the NPM archive, so in most cases it should be as simple as `npm install koffi`. If you want to hack Koffi or use a specific platform, follow the instructions below.
|
|
12
12
|
|
|
13
|
+
Start by cloning the repository with [Git](https://git-scm.com/):
|
|
14
|
+
|
|
15
|
+
```sh
|
|
16
|
+
git clone https://github.com/Koromix/luigi
|
|
17
|
+
cd luigi/koffi
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
As said before, this is a monorepository containg multiple projects, hence the name.
|
|
21
|
+
|
|
13
22
|
### Windows
|
|
14
23
|
|
|
15
24
|
First, make sure the following dependencies are met:
|
|
@@ -113,3 +122,10 @@ The following features are also planned eventually, not necessarily in that orde
|
|
|
113
122
|
- Add support for unions
|
|
114
123
|
- Provide better ways to automatically deal with caller/heap-allocated memory (strings, etc.)
|
|
115
124
|
- Port Koffi to PowerPC (POWER9+) ABI
|
|
125
|
+
- Fix assembly unwind and CFI directives for better debugging experience
|
|
126
|
+
|
|
127
|
+
## Code style
|
|
128
|
+
|
|
129
|
+
Koffi is programmed in a mix of C++ and assembly code (architecture-specific code). It uses [node-addon-api](https://github.com/nodejs/node-addon-api) (C++ N-API wrapper) to interact with Node.js.
|
|
130
|
+
|
|
131
|
+
My personal preference goes to a rather C-like C++ style, with careful use of templates (mainly for containers) and little object-oriented programming. I strongly prefer tagged unions and code locality over inheritance and virtual methods. Exceptions are disabled.
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -1,11 +1,15 @@
|
|
|
1
1
|
# Benchmarks
|
|
2
2
|
|
|
3
|
-
Here is a quick overview of the execution time of Koffi calls on three
|
|
3
|
+
Here is a quick overview of the execution time of Koffi calls on three benchmarks, where it is compared to a theoretical ideal FFI implementation (approximated with pre-compiled static N-API glue code):
|
|
4
|
+
|
|
5
|
+
- The first benchmark is based on `rand()` calls
|
|
6
|
+
- The second benchmark is based on `atoi()` calls
|
|
7
|
+
- The third benchmark is based on [Raylib](https://www.raylib.com/)
|
|
4
8
|
|
|
5
9
|
<table style="margin: 0 auto;">
|
|
6
10
|
<tr>
|
|
7
|
-
<td><img src="_static/
|
|
8
|
-
<td><img src="_static/
|
|
11
|
+
<td><a href="_static/perf_linux_20220623_2.png" target="_blank"><img src="_static/perf_linux_20220623_2.png" alt="Linux performance" style="width: 350px;"/></a></td>
|
|
12
|
+
<td><a href="_static/perf_windows_20220623_2.png" target="_blank"><img src="_static/perf_windows_20220623_2.png" alt="Windows performance" style="width: 350px;"/></a></td>
|
|
9
13
|
</tr>
|
|
10
14
|
</table>
|
|
11
15
|
|
|
@@ -15,7 +19,7 @@ These results are detailed and explained below, and compared to node-ffi/node-ff
|
|
|
15
19
|
|
|
16
20
|
This test is based around repeated calls to a simple standard C function atoi, and has three implementations:
|
|
17
21
|
|
|
18
|
-
- the first one is the reference, it calls atoi through an N-API module, and is close to the theoretical limit of a perfect (no overhead) Node.js > C FFI implementation
|
|
22
|
+
- the first one is the reference, it calls atoi through an N-API module, and is close to the theoretical limit of a perfect (no overhead) Node.js > C FFI implementation (pre-compiled static glue code)
|
|
19
23
|
- the second one calls atoi through Koffi
|
|
20
24
|
- the third one uses the official Node.js FFI implementation, node-ffi-napi
|
|
21
25
|
|
|
@@ -25,21 +29,21 @@ Because rand is a pretty small function, the FFI overhead is clearly visible.
|
|
|
25
29
|
|
|
26
30
|
The results below were measured on my x86_64 Linux machine (AMD® Ryzen™ 7 4700U):
|
|
27
31
|
|
|
28
|
-
Benchmark | Iterations | Total time | Overhead
|
|
29
|
-
------------- | ---------- | ----------- | ----------
|
|
30
|
-
rand_napi | 20000000 | 1.44s | (baseline)
|
|
31
|
-
rand_koffi | 20000000 | 2.60s |
|
|
32
|
-
rand_node_ffi | 20000000 | 107.58s |
|
|
32
|
+
Benchmark | Iterations | Total time | Relative performance | Overhead
|
|
33
|
+
------------- | ---------- | ----------- | -------------------- | ----------
|
|
34
|
+
rand_napi | 20000000 | 1.44s | (baseline) | (baseline)
|
|
35
|
+
rand_koffi | 20000000 | 2.60s | x0.55 | +81%
|
|
36
|
+
rand_node_ffi | 20000000 | 107.58s | x0.01 | +7400%
|
|
33
37
|
|
|
34
38
|
### Windows x86_64
|
|
35
39
|
|
|
36
40
|
The results below were measured on my x86_64 Windows machine (Intel® Core™ i5-4460):
|
|
37
41
|
|
|
38
|
-
Benchmark | Iterations | Total time | Overhead
|
|
39
|
-
------------- | ---------- | ----------- | ----------
|
|
40
|
-
rand_napi | 20000000 | 2.10s | (baseline)
|
|
41
|
-
rand_koffi | 20000000 | 3.87s |
|
|
42
|
-
rand_node_ffi | 20000000 | 87.84s |
|
|
42
|
+
Benchmark | Iterations | Total time | Relative performance | Overhead
|
|
43
|
+
------------- | ---------- | ----------- | -------------------- | ----------
|
|
44
|
+
rand_napi | 20000000 | 2.10s | (baseline) | (baseline)
|
|
45
|
+
rand_koffi | 20000000 | 3.87s | x0.54 | +84%
|
|
46
|
+
rand_node_ffi | 20000000 | 87.84s | x0.02 | +4100%
|
|
43
47
|
|
|
44
48
|
## atoi results
|
|
45
49
|
|
|
@@ -51,21 +55,21 @@ Because rand is a pretty small function, the FFI overhead is clearly visible.
|
|
|
51
55
|
|
|
52
56
|
The results below were measured on my x86_64 Linux machine (AMD® Ryzen™ 7 4700U):
|
|
53
57
|
|
|
54
|
-
Benchmark | Iterations | Total time | Overhead
|
|
55
|
-
------------- | ---------- | ----------- | ----------
|
|
56
|
-
atoi_napi | 20000000 | 2.97s | (baseline)
|
|
57
|
-
atoi_koffi | 20000000 | 5.07s |
|
|
58
|
-
atoi_node_ffi | 20000000 | 693.16s |
|
|
58
|
+
Benchmark | Iterations | Total time | Relative performance | Overhead
|
|
59
|
+
------------- | ---------- | ----------- | -------------------- | ----------
|
|
60
|
+
atoi_napi | 20000000 | 2.97s | (baseline) | (baseline)
|
|
61
|
+
atoi_koffi | 20000000 | 5.07s | x0.58 | +71%
|
|
62
|
+
atoi_node_ffi | 20000000 | 693.16s | x0.005 | +23000%
|
|
59
63
|
|
|
60
64
|
### Windows x86_64
|
|
61
65
|
|
|
62
66
|
The results below were measured on my x86_64 Windows machine (Intel® Core™ i5-4460):
|
|
63
67
|
|
|
64
|
-
Benchmark | Iterations | Total time | Overhead
|
|
65
|
-
------------- | ---------- | ----------- | ----------
|
|
66
|
-
atoi_napi | 20000000 | 2.97s | (baseline)
|
|
67
|
-
atoi_koffi | 20000000 | 5.91s |
|
|
68
|
-
atoi_node_ffi | 20000000 | 479.34s |
|
|
68
|
+
Benchmark | Iterations | Total time | Relative performance | Overhead
|
|
69
|
+
------------- | ---------- | ----------- | -------------------- | ----------
|
|
70
|
+
atoi_napi | 20000000 | 2.97s | (baseline) | (baseline)
|
|
71
|
+
atoi_koffi | 20000000 | 5.91s | x0.50 | +99%
|
|
72
|
+
atoi_node_ffi | 20000000 | 479.34s | x0.006 | +16000%
|
|
69
73
|
|
|
70
74
|
## Raylib results
|
|
71
75
|
|
|
@@ -78,23 +82,23 @@ This benchmark uses the CPU-based image drawing functions in Raylib. The calls a
|
|
|
78
82
|
|
|
79
83
|
The results below were measured on my x86_64 Linux machine (AMD® Ryzen™ 7 4700U):
|
|
80
84
|
|
|
81
|
-
Benchmark | Iterations | Total time | Overhead
|
|
82
|
-
--------------- | ---------- | ----------- | ----------
|
|
83
|
-
raylib_cc | 100 | 9.31s |
|
|
84
|
-
raylib_node_raylib | 100 | 10.90s |
|
|
85
|
-
raylib_koffi | 100 | 12.86s |
|
|
86
|
-
raylib_node_ffi | 100 | 35.76s |
|
|
85
|
+
Benchmark | Iterations | Total time | Relative performance | Overhead
|
|
86
|
+
--------------- | ---------- | ----------- | -------------------- | ----------
|
|
87
|
+
raylib_cc | 100 | 9.31s | x1.17 | -15%
|
|
88
|
+
raylib_node_raylib | 100 | 10.90s | (baseline) | (baseline)
|
|
89
|
+
raylib_koffi | 100 | 12.86s | x0.84 | +18%
|
|
90
|
+
raylib_node_ffi | 100 | 35.76s | x0.30 | +228%
|
|
87
91
|
|
|
88
92
|
### Windows x86_64
|
|
89
93
|
|
|
90
94
|
The results below were measured on my x86_64 Windows machine (Intel® Core™ i5-4460):
|
|
91
95
|
|
|
92
|
-
Benchmark | Iterations | Total time | Overhead
|
|
93
|
-
--------------- | ---------- | ----------- | ----------
|
|
94
|
-
raylib_cc | 100 | 10.67s |
|
|
95
|
-
raylib_node_raylib | 100 | 12.05s |
|
|
96
|
-
raylib_koffi | 100 | 14.84s |
|
|
97
|
-
raylib_node_ffi | 100 | 44.63s |
|
|
96
|
+
Benchmark | Iterations | Total time | Relative performance | Overhead
|
|
97
|
+
--------------- | ---------- | ----------- | -------------------- | ----------
|
|
98
|
+
raylib_cc | 100 | 10.67s | x1.17 | -12%
|
|
99
|
+
raylib_node_raylib | 100 | 12.05s | (baseline) | (baseline)
|
|
100
|
+
raylib_koffi | 100 | 14.84s | x0.81 | +23%
|
|
101
|
+
raylib_node_ffi | 100 | 44.63s | x0.27 | +270%
|
|
98
102
|
|
|
99
103
|
## Running benchmarks
|
|
100
104
|
|
|
@@ -10,6 +10,15 @@ Go here: https://github.com/Koromix/luigi/issues
|
|
|
10
10
|
|
|
11
11
|
We provide prebuilt binaries, packaged in the NPM archive, so in most cases it should be as simple as `npm install koffi`. If you want to hack Koffi or use a specific platform, follow the instructions below.
|
|
12
12
|
|
|
13
|
+
Start by cloning the repository with [Git](https://git-scm.com/):
|
|
14
|
+
|
|
15
|
+
```sh
|
|
16
|
+
git clone https://github.com/Koromix/luigi
|
|
17
|
+
cd luigi/koffi
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
As said before, this is a monorepository containg multiple projects, hence the name.
|
|
21
|
+
|
|
13
22
|
### Windows
|
|
14
23
|
|
|
15
24
|
First, make sure the following dependencies are met:
|
|
@@ -113,3 +122,10 @@ The following features are also planned eventually, not necessarily in that orde
|
|
|
113
122
|
- Add support for unions
|
|
114
123
|
- Provide better ways to automatically deal with caller/heap-allocated memory (strings, etc.)
|
|
115
124
|
- Port Koffi to PowerPC (POWER9+) ABI
|
|
125
|
+
- Fix assembly unwind and CFI directives for better debugging experience
|
|
126
|
+
|
|
127
|
+
## Code style
|
|
128
|
+
|
|
129
|
+
Koffi is programmed in a mix of C++ and assembly code (architecture-specific code). It uses [node-addon-api](https://github.com/nodejs/node-addon-api) (C++ N-API wrapper) to interact with Node.js.
|
|
130
|
+
|
|
131
|
+
My personal preference goes to a rather C-like C++ style, with careful use of templates (mainly for containers) and little object-oriented programming. I strongly prefer tagged unions and code locality over inheritance and virtual methods. Exceptions are disabled.
|