xnd 0.2.0dev3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CONTRIBUTING.md +42 -0
- data/Gemfile +3 -0
- data/History.md +0 -0
- data/README.md +7 -0
- data/Rakefile +135 -0
- data/ext/ruby_xnd/extconf.rb +70 -0
- data/ext/ruby_xnd/float_pack_unpack.c +277 -0
- data/ext/ruby_xnd/float_pack_unpack.h +39 -0
- data/ext/ruby_xnd/gc_guard.c +36 -0
- data/ext/ruby_xnd/gc_guard.h +12 -0
- data/ext/ruby_xnd/include/xnd.h +449 -0
- data/ext/ruby_xnd/lib/libxnd.a +0 -0
- data/ext/ruby_xnd/lib/libxnd.so +1 -0
- data/ext/ruby_xnd/lib/libxnd.so.0 +1 -0
- data/ext/ruby_xnd/lib/libxnd.so.0.2.0dev3 +0 -0
- data/ext/ruby_xnd/memory_block_object.c +32 -0
- data/ext/ruby_xnd/memory_block_object.h +33 -0
- data/ext/ruby_xnd/ruby_xnd.c +1953 -0
- data/ext/ruby_xnd/ruby_xnd.h +61 -0
- data/ext/ruby_xnd/ruby_xnd_internal.h +85 -0
- data/ext/ruby_xnd/util.h +170 -0
- data/ext/ruby_xnd/xnd/AUTHORS.txt +5 -0
- data/ext/ruby_xnd/xnd/INSTALL.txt +134 -0
- data/ext/ruby_xnd/xnd/LICENSE.txt +29 -0
- data/ext/ruby_xnd/xnd/MANIFEST.in +3 -0
- data/ext/ruby_xnd/xnd/Makefile.in +80 -0
- data/ext/ruby_xnd/xnd/README.rst +44 -0
- data/ext/ruby_xnd/xnd/config.guess +1530 -0
- data/ext/ruby_xnd/xnd/config.h.in +22 -0
- data/ext/ruby_xnd/xnd/config.sub +1782 -0
- data/ext/ruby_xnd/xnd/configure +4867 -0
- data/ext/ruby_xnd/xnd/configure.ac +164 -0
- data/ext/ruby_xnd/xnd/doc/Makefile +14 -0
- data/ext/ruby_xnd/xnd/doc/_static/copybutton.js +66 -0
- data/ext/ruby_xnd/xnd/doc/conf.py +26 -0
- data/ext/ruby_xnd/xnd/doc/index.rst +44 -0
- data/ext/ruby_xnd/xnd/doc/libxnd/data-structures.rst +186 -0
- data/ext/ruby_xnd/xnd/doc/libxnd/functions.rst +148 -0
- data/ext/ruby_xnd/xnd/doc/libxnd/index.rst +25 -0
- data/ext/ruby_xnd/xnd/doc/releases/index.rst +34 -0
- data/ext/ruby_xnd/xnd/doc/xnd/align-pack.rst +96 -0
- data/ext/ruby_xnd/xnd/doc/xnd/buffer-protocol.rst +42 -0
- data/ext/ruby_xnd/xnd/doc/xnd/index.rst +30 -0
- data/ext/ruby_xnd/xnd/doc/xnd/quickstart.rst +62 -0
- data/ext/ruby_xnd/xnd/doc/xnd/types.rst +674 -0
- data/ext/ruby_xnd/xnd/install-sh +527 -0
- data/ext/ruby_xnd/xnd/libxnd/Makefile.in +102 -0
- data/ext/ruby_xnd/xnd/libxnd/Makefile.vc +112 -0
- data/ext/ruby_xnd/xnd/libxnd/bitmaps.c +345 -0
- data/ext/ruby_xnd/xnd/libxnd/contrib.h +313 -0
- data/ext/ruby_xnd/xnd/libxnd/copy.c +944 -0
- data/ext/ruby_xnd/xnd/libxnd/equal.c +1216 -0
- data/ext/ruby_xnd/xnd/libxnd/inline.h +154 -0
- data/ext/ruby_xnd/xnd/libxnd/overflow.h +147 -0
- data/ext/ruby_xnd/xnd/libxnd/split.c +286 -0
- data/ext/ruby_xnd/xnd/libxnd/tests/Makefile.in +39 -0
- data/ext/ruby_xnd/xnd/libxnd/tests/Makefile.vc +44 -0
- data/ext/ruby_xnd/xnd/libxnd/tests/README.txt +2 -0
- data/ext/ruby_xnd/xnd/libxnd/tests/runtest.c +101 -0
- data/ext/ruby_xnd/xnd/libxnd/tests/test.h +48 -0
- data/ext/ruby_xnd/xnd/libxnd/tests/test_fixed.c +108 -0
- data/ext/ruby_xnd/xnd/libxnd/xnd.c +1304 -0
- data/ext/ruby_xnd/xnd/libxnd/xnd.h +449 -0
- data/ext/ruby_xnd/xnd/python/test_xnd.py +3144 -0
- data/ext/ruby_xnd/xnd/python/xnd/__init__.py +290 -0
- data/ext/ruby_xnd/xnd/python/xnd/_xnd.c +2822 -0
- data/ext/ruby_xnd/xnd/python/xnd/contrib/pretty.py +850 -0
- data/ext/ruby_xnd/xnd/python/xnd/docstrings.h +129 -0
- data/ext/ruby_xnd/xnd/python/xnd/pyxnd.h +200 -0
- data/ext/ruby_xnd/xnd/python/xnd/util.h +182 -0
- data/ext/ruby_xnd/xnd/python/xnd_randvalue.py +1121 -0
- data/ext/ruby_xnd/xnd/python/xnd_support.py +106 -0
- data/ext/ruby_xnd/xnd/setup.py +303 -0
- data/ext/ruby_xnd/xnd/vcbuild/INSTALL.txt +42 -0
- data/ext/ruby_xnd/xnd/vcbuild/runtest32.bat +16 -0
- data/ext/ruby_xnd/xnd/vcbuild/runtest64.bat +14 -0
- data/ext/ruby_xnd/xnd/vcbuild/vcbuild32.bat +29 -0
- data/ext/ruby_xnd/xnd/vcbuild/vcbuild64.bat +29 -0
- data/ext/ruby_xnd/xnd/vcbuild/vcclean.bat +13 -0
- data/ext/ruby_xnd/xnd/vcbuild/vcdistclean.bat +14 -0
- data/lib/ruby_xnd.so +0 -0
- data/lib/xnd.rb +306 -0
- data/lib/xnd/monkeys.rb +29 -0
- data/lib/xnd/version.rb +6 -0
- data/spec/debug_spec.rb +9 -0
- data/spec/gc_guard_spec.rb +10 -0
- data/spec/leakcheck.rb +9 -0
- data/spec/spec_helper.rb +877 -0
- data/spec/type_inference_spec.rb +81 -0
- data/spec/xnd_spec.rb +2921 -0
- data/xnd.gemspec +47 -0
- metadata +215 -0
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
.. meta::
|
|
2
|
+
:robots: index,follow
|
|
3
|
+
:description: libndtypes documentation
|
|
4
|
+
|
|
5
|
+
.. sectionauthor:: Stefan Krah <skrah at bytereef.org>
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
Functions
|
|
9
|
+
=========
|
|
10
|
+
|
|
11
|
+
Create typed memory blocks
|
|
12
|
+
--------------------------
|
|
13
|
+
|
|
14
|
+
The main use case for libxnd is to create and manage typed memory blocks.
|
|
15
|
+
These blocks are fully initialized to *0*. References to additional memory
|
|
16
|
+
blocks are allocated and initialized recursively.
|
|
17
|
+
|
|
18
|
+
*bytes* and *string* types are initialized to :c:macro:`NULL`, since their
|
|
19
|
+
actual length is not known yet.
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
.. topic:: xnd_empty_from_string
|
|
23
|
+
|
|
24
|
+
.. code-block:: c
|
|
25
|
+
|
|
26
|
+
xnd_master_t *xnd_empty_from_string(const char *s, uint32_t flags, ndt_context_t *ctx);
|
|
27
|
+
|
|
28
|
+
Return a new master buffer according to the type string in *s*. *flags*
|
|
29
|
+
must include :c:macro:`XND_OWN_TYPE`.
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
.. topic:: xnd_empty_from_type
|
|
33
|
+
|
|
34
|
+
.. code-block:: c
|
|
35
|
+
|
|
36
|
+
xnd_master_t *xnd_empty_from_type(const ndt_t *t, uint32_t flags, ndt_context_t *ctx);
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
Return a new master buffer according to *type*. *flags* must not include
|
|
40
|
+
:c:macro:`XND_OWN_TYPE`, i.e. the type is externally managed.
|
|
41
|
+
|
|
42
|
+
This is the case in the Python bindings, where the ndtypes module creates
|
|
43
|
+
and manages types.
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
Delete typed memory blocks
|
|
47
|
+
--------------------------
|
|
48
|
+
|
|
49
|
+
.. topic:: xnd_del
|
|
50
|
+
|
|
51
|
+
.. code-block:: c
|
|
52
|
+
|
|
53
|
+
void xnd_del(xnd_master_t *x);
|
|
54
|
+
|
|
55
|
+
Delete the master buffer according to its flags. *x* may be :c:macro:`NULL`.
|
|
56
|
+
*x->master.ptr* and *x->master.type* may be :c:macro:`NULL`.
|
|
57
|
+
|
|
58
|
+
The latter situation should only arise when breaking up reference cycles.
|
|
59
|
+
This is used in the Python module.
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
Bitmaps
|
|
63
|
+
-------
|
|
64
|
+
|
|
65
|
+
.. topic:: xnd_bitmap_next
|
|
66
|
+
|
|
67
|
+
.. code-block:: c
|
|
68
|
+
|
|
69
|
+
xnd_bitmap_t xnd_bitmap_next(const xnd_t *x, int64_t i, ndt_context_t *ctx);
|
|
70
|
+
|
|
71
|
+
Get the next bitmap for the *Tuple*, *Record*, *Ref* and *Constr* types.
|
|
72
|
+
|
|
73
|
+
This is a convenience function that checks if the types have optional
|
|
74
|
+
subtrees.
|
|
75
|
+
|
|
76
|
+
If yes, return the bitmap at index *i*. If not, it return an empty bitmap
|
|
77
|
+
that must not be accessed.
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
.. topic:: xnd_set_valid
|
|
81
|
+
|
|
82
|
+
.. code-block:: c
|
|
83
|
+
|
|
84
|
+
void xnd_set_valid(xnd_t *x);
|
|
85
|
+
|
|
86
|
+
Set the validity bit at *x->index*. *x* must have an optional type.
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
.. topic:: xnd_set_na
|
|
90
|
+
|
|
91
|
+
.. code-block:: c
|
|
92
|
+
|
|
93
|
+
void xnd_set_na(xnd_t *x);
|
|
94
|
+
|
|
95
|
+
Clear the validity bit at *x->index*. *x* must have an optional type.
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
.. topic:: xnd_is_valid
|
|
99
|
+
|
|
100
|
+
.. code-block:: c
|
|
101
|
+
|
|
102
|
+
int xnd_is_valid(const xnd_t *x);
|
|
103
|
+
|
|
104
|
+
Check if the element at *x->index* is valid. If *x* does not have an optional
|
|
105
|
+
type, return *1*. Otherwise, return the validity bit (zero or nonzero).
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
.. topic:: xnd_is_na
|
|
109
|
+
|
|
110
|
+
.. code-block:: c
|
|
111
|
+
|
|
112
|
+
int xnd_is_na(const xnd_t *x);
|
|
113
|
+
|
|
114
|
+
Check if the element at *x->index* is valid. If *x* does not have an optional
|
|
115
|
+
type, return *0*. Otherwise, return the negation of the validity bit.
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
.. topic:: xnd_subtree
|
|
119
|
+
|
|
120
|
+
.. code-block:: c
|
|
121
|
+
|
|
122
|
+
xnd_t xnd_subtree(const xnd_t *x, const xnd_index_t indices[], int len,
|
|
123
|
+
ndt_context_t *ctx);
|
|
124
|
+
|
|
125
|
+
Apply zero or more indices to the input *x* and return a typed view. Valid
|
|
126
|
+
indices are integers or strings for record fields.
|
|
127
|
+
|
|
128
|
+
This function is more general than pure array indexing, hence the name. For
|
|
129
|
+
example, it is possible to index into nested records that in turn contain
|
|
130
|
+
arrays.
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
.. topic:: xnd_multikey
|
|
134
|
+
|
|
135
|
+
.. code-block:: c
|
|
136
|
+
|
|
137
|
+
xnd_t xnd_multikey(const xnd_t *x, const xnd_index_t indices[], int len,
|
|
138
|
+
ndt_context_t *ctx);
|
|
139
|
+
|
|
140
|
+
Apply zero or more keys to the input *x* and return a typed view. Valid
|
|
141
|
+
keys are integers or slices.
|
|
142
|
+
|
|
143
|
+
This function differs from :c:func:`xnd_subtree` in that it allows
|
|
144
|
+
mixed indexing and slicing for fixed dimensions. Records and tuples
|
|
145
|
+
cannot be sliced.
|
|
146
|
+
|
|
147
|
+
Variable dimensions can be sliced, but do not support mixed indexing
|
|
148
|
+
and slicing.
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
.. meta::
|
|
2
|
+
:robots: index, follow
|
|
3
|
+
:description: libxnd documentation
|
|
4
|
+
:keywords: libxnd, C, array computing
|
|
5
|
+
|
|
6
|
+
.. sectionauthor:: Stefan Krah <skrah at bytereef.org>
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
libxnd
|
|
10
|
+
------
|
|
11
|
+
|
|
12
|
+
libxnd implements support for typed memory blocks using the libndtypes
|
|
13
|
+
type library.
|
|
14
|
+
|
|
15
|
+
Types include ndarrays, ragged arrays (compatible with the Arrow list type),
|
|
16
|
+
optional data (bitmaps are compatible with Arrow), tuples, records (structs),
|
|
17
|
+
strings, bytes and categorical values.
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
.. toctree::
|
|
21
|
+
|
|
22
|
+
data-structures.rst
|
|
23
|
+
functions.rst
|
|
24
|
+
|
|
25
|
+
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
.. meta::
|
|
2
|
+
:robots: index, follow
|
|
3
|
+
:description: libndtypes documentation
|
|
4
|
+
:keywords: libndtypes, C, array computing
|
|
5
|
+
|
|
6
|
+
.. sectionauthor:: Stefan Krah <skrah at bytereef.org>
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
========
|
|
10
|
+
Releases
|
|
11
|
+
========
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
v0.2.0b2 (February 5th 2018)
|
|
15
|
+
============================
|
|
16
|
+
|
|
17
|
+
Second release (beta2). This release addresses several build and packaging issues:
|
|
18
|
+
|
|
19
|
+
- Avoid copying libraries into the Python package if system libraries are used.
|
|
20
|
+
|
|
21
|
+
- The build and install partially relied on the dev setup (ndtypes checked out
|
|
22
|
+
in the xnd directory). This dependency has been removed.
|
|
23
|
+
|
|
24
|
+
- The conda build now supports separate library and Python module installs.
|
|
25
|
+
|
|
26
|
+
- Configure now has a **–without-docs** option for skipping the doc install.
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
v0.2.0b1 (January 20th 2018)
|
|
30
|
+
============================
|
|
31
|
+
|
|
32
|
+
First release (beta1).
|
|
33
|
+
|
|
34
|
+
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
.. meta::
|
|
2
|
+
:robots: index,follow
|
|
3
|
+
:description: xnd container
|
|
4
|
+
:keywords: xnd, alignment, packing
|
|
5
|
+
|
|
6
|
+
.. sectionauthor:: Stefan Krah <skrah at bytereef.org>
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
Alignment and packing
|
|
10
|
+
=====================
|
|
11
|
+
|
|
12
|
+
The xnd memory allocators support explicit alignment. Alignment is specified
|
|
13
|
+
in the types.
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
Tuples and records
|
|
17
|
+
------------------
|
|
18
|
+
|
|
19
|
+
Tuples and records have the *align* and *pack* keywords that have the same
|
|
20
|
+
purpose as gcc's *aligned* and *packed* struct attributes.
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
Field alignment
|
|
24
|
+
~~~~~~~~~~~~~~~
|
|
25
|
+
|
|
26
|
+
The *align* keyword can be used to specify an alignment that is greater
|
|
27
|
+
than the natural alignment of a field:
|
|
28
|
+
|
|
29
|
+
.. doctest::
|
|
30
|
+
|
|
31
|
+
>>> from xnd import *
|
|
32
|
+
>>> s = "(uint8, uint64 |align=32|, uint64)"
|
|
33
|
+
>>> x = xnd.empty(s)
|
|
34
|
+
>>> x.align
|
|
35
|
+
32
|
|
36
|
+
>>> x.type.datasize
|
|
37
|
+
64
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
Field packing
|
|
42
|
+
~~~~~~~~~~~~~
|
|
43
|
+
|
|
44
|
+
The *pack* keyword can be used to specify an alignment that is smaller
|
|
45
|
+
than the natural alignment of a field:
|
|
46
|
+
|
|
47
|
+
.. doctest::
|
|
48
|
+
|
|
49
|
+
>>> s = "(uint8, uint64 |pack=2|, uint64)"
|
|
50
|
+
>>> x = xnd.empty(s)
|
|
51
|
+
>>> x.align
|
|
52
|
+
8
|
|
53
|
+
>>> x.type.datasize
|
|
54
|
+
24
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
Struct packing
|
|
59
|
+
~~~~~~~~~~~~~~
|
|
60
|
+
|
|
61
|
+
The *pack* and *align* keywords can be applied to the entire struct:
|
|
62
|
+
|
|
63
|
+
.. doctest::
|
|
64
|
+
|
|
65
|
+
>>> s = "(uint8, uint64, uint64, pack=1)"
|
|
66
|
+
>>> x = xnd.empty(s)
|
|
67
|
+
>>> x.align
|
|
68
|
+
1
|
|
69
|
+
>>> x.type.datasize
|
|
70
|
+
17
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
Individual field and struct directives are mutually exclusive:
|
|
74
|
+
|
|
75
|
+
.. doctest::
|
|
76
|
+
|
|
77
|
+
>>> s = "2 * (uint8 |align=16|, uint64, pack=1)"
|
|
78
|
+
>>> x = xnd.empty(s)
|
|
79
|
+
Traceback (most recent call last):
|
|
80
|
+
File "<stdin>", line 1, in <module>
|
|
81
|
+
ValueError: cannot have 'pack' tuple attribute and field attributes
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
Array alignment
|
|
85
|
+
~~~~~~~~~~~~~~~
|
|
86
|
+
|
|
87
|
+
An array has the same alignment as its elements:
|
|
88
|
+
|
|
89
|
+
.. doctest::
|
|
90
|
+
|
|
91
|
+
>>> s = "2 * (uint8, uint64, pack=1)"
|
|
92
|
+
>>> x = xnd.empty(s)
|
|
93
|
+
>>> x.align
|
|
94
|
+
1
|
|
95
|
+
>>> x.type.datasize
|
|
96
|
+
18
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
.. meta::
|
|
2
|
+
:robots: index,follow
|
|
3
|
+
:description: xnd container
|
|
4
|
+
:keywords: xnd, buffer protocol
|
|
5
|
+
|
|
6
|
+
.. sectionauthor:: Stefan Krah <skrah at bytereef.org>
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
Buffer protocol
|
|
10
|
+
===============
|
|
11
|
+
|
|
12
|
+
xnd supports importing PEP-3118 buffers.
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
From NumPy
|
|
16
|
+
----------
|
|
17
|
+
|
|
18
|
+
Import a simple ndarray:
|
|
19
|
+
|
|
20
|
+
.. doctest::
|
|
21
|
+
|
|
22
|
+
>>> import numpy as np
|
|
23
|
+
>>> from xnd import *
|
|
24
|
+
>>> x = np.array([[[0,1,2], [3,4,5]], [[6,7,8], [9,10,11]]])
|
|
25
|
+
>>> y = xnd.from_buffer(x)
|
|
26
|
+
>>> y.type
|
|
27
|
+
ndt("2 * 2 * 3 * int64")
|
|
28
|
+
>>> y.value
|
|
29
|
+
[[[0, 1, 2], [3, 4, 5]], [[6, 7, 8], [9, 10, 11]]]
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
Import an ndarray with a struct dtype:
|
|
33
|
+
|
|
34
|
+
.. doctest::
|
|
35
|
+
|
|
36
|
+
>>> x = np.array([(1000, 400.25, 'abc'), (-23, -1e10, 'cba')],
|
|
37
|
+
... dtype=[('x', '<i4'), ('y', '>f4'), ('z', 'S3')])
|
|
38
|
+
>>> y = xnd.from_buffer(x)
|
|
39
|
+
>>> y.type
|
|
40
|
+
ndt("2 * {x : int32, y : >float32, z : fixed_bytes(size=3)}")
|
|
41
|
+
>>> y.value
|
|
42
|
+
[{'x': 1000, 'y': 400.25, 'z': b'abc'}, {'x': -23, 'y': -10000000000.0, 'z': b'cba'}]
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
.. meta::
|
|
2
|
+
:robots: index, follow
|
|
3
|
+
:description: xnd documentation
|
|
4
|
+
:keywords: memory blocks, unboxed values, array computing, Python
|
|
5
|
+
|
|
6
|
+
.. sectionauthor:: Stefan Krah <skrah at bytereef.org>
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
xnd
|
|
10
|
+
---
|
|
11
|
+
|
|
12
|
+
The xnd module implements a container type that maps most Python values
|
|
13
|
+
relevant for scientific computing directly to typed memory.
|
|
14
|
+
|
|
15
|
+
Whenever possible, a single, pointer-free memory block is used.
|
|
16
|
+
|
|
17
|
+
xnd supports ragged arrays, categorical types, indexing, slicing, aligned memory blocks and type inference.
|
|
18
|
+
|
|
19
|
+
Operations like indexing and slicing return zero-copy typed views on the data.
|
|
20
|
+
|
|
21
|
+
Importing PEP-3118 buffers is supported.
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
.. toctree::
|
|
25
|
+
:maxdepth: 1
|
|
26
|
+
|
|
27
|
+
types.rst
|
|
28
|
+
align-pack.rst
|
|
29
|
+
buffer-protocol.rst
|
|
30
|
+
quickstart.rst
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
.. meta::
|
|
2
|
+
:robots: index,follow
|
|
3
|
+
:description: xnd quickstart
|
|
4
|
+
:keywords: xnd, install
|
|
5
|
+
|
|
6
|
+
.. sectionauthor:: Stefan Krah <skrah at bytereef.org>
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
Quick Start
|
|
10
|
+
===========
|
|
11
|
+
|
|
12
|
+
Prerequisites
|
|
13
|
+
~~~~~~~~~~~~~
|
|
14
|
+
|
|
15
|
+
Python2 is not supported. If not already present, install the Python3
|
|
16
|
+
development packages:
|
|
17
|
+
|
|
18
|
+
.. code-block:: sh
|
|
19
|
+
|
|
20
|
+
# Debian, Ubuntu:
|
|
21
|
+
sudo apt-get install gcc make
|
|
22
|
+
sudo apt-get install python3-dev
|
|
23
|
+
|
|
24
|
+
# Fedora, RedHat:
|
|
25
|
+
sudo yum install gcc make
|
|
26
|
+
sudo yum install python3-devel
|
|
27
|
+
|
|
28
|
+
# openSUSE:
|
|
29
|
+
sudo zypper install gcc make
|
|
30
|
+
sudo zypper install python3-devel
|
|
31
|
+
|
|
32
|
+
# BSD:
|
|
33
|
+
# You know what to do.
|
|
34
|
+
|
|
35
|
+
# Mac OS X:
|
|
36
|
+
# Install Xcode and Python 3 headers.
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
Install
|
|
40
|
+
~~~~~~~
|
|
41
|
+
|
|
42
|
+
If `pip <http://pypi.python.org/pypi/pip>`_ is present on the system, installation
|
|
43
|
+
should be as easy as:
|
|
44
|
+
|
|
45
|
+
.. code-block:: sh
|
|
46
|
+
|
|
47
|
+
pip install xnd
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
Otherwise:
|
|
51
|
+
|
|
52
|
+
.. code-block:: sh
|
|
53
|
+
|
|
54
|
+
tar xvzf xnd.2.0b1.tar.gz
|
|
55
|
+
cd xnd.2.0b1
|
|
56
|
+
python3 setup.py install
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
Windows
|
|
60
|
+
~~~~~~~
|
|
61
|
+
|
|
62
|
+
Refer to the instructions in the *vcbuild* directory in the source distribution.
|
|
@@ -0,0 +1,674 @@
|
|
|
1
|
+
.. meta::
|
|
2
|
+
:robots: index,follow
|
|
3
|
+
:description: xnd container
|
|
4
|
+
:keywords: xnd, types, examples
|
|
5
|
+
|
|
6
|
+
.. sectionauthor:: Stefan Krah <skrah at bytereef.org>
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
Types
|
|
10
|
+
=====
|
|
11
|
+
|
|
12
|
+
The xnd object is a container that maps a wide range of Python values directly
|
|
13
|
+
to memory. xnd unpacks complex types of arbitrary nesting depth to a single
|
|
14
|
+
memory block.
|
|
15
|
+
|
|
16
|
+
Pointers only occur in explicit pointer types like *Ref* (reference), *Bytes*
|
|
17
|
+
and *String*, but not in the general case.
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
Type inference
|
|
21
|
+
--------------
|
|
22
|
+
|
|
23
|
+
If no explicit type is given, xnd supports type inference by assuming
|
|
24
|
+
types for the most common Python values.
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
Fixed arrays
|
|
28
|
+
~~~~~~~~~~~~
|
|
29
|
+
|
|
30
|
+
.. doctest::
|
|
31
|
+
|
|
32
|
+
>>> from xnd import *
|
|
33
|
+
>>> x = xnd([[0, 1, 2], [3, 4, 5]])
|
|
34
|
+
>>> x
|
|
35
|
+
xnd([[0, 1, 2], [3, 4, 5]], type='2 * 3 * int64')
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
As expected, lists are mapped to ndarrays and integers to int64. Indexing and
|
|
39
|
+
slicing works the usual way. For performance reasons these operations return
|
|
40
|
+
zero-copy views whenever possible:
|
|
41
|
+
|
|
42
|
+
.. doctest::
|
|
43
|
+
|
|
44
|
+
>>> x[0][1] # Indexing returns views, even for scalars.
|
|
45
|
+
xnd(1, type='int64')
|
|
46
|
+
>>>
|
|
47
|
+
>>> y = x[:, ::-1] # Containers are returned as views.
|
|
48
|
+
>>> y
|
|
49
|
+
xnd([[2, 1, 0], [5, 4, 3]], type='2 * 3 * int64')
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
Subarrays are views and properly typed:
|
|
53
|
+
|
|
54
|
+
.. doctest::
|
|
55
|
+
|
|
56
|
+
>>> x[1]
|
|
57
|
+
xnd([3, 4, 5], type='3 * int64')
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
The representation of large values is abbreviated:
|
|
61
|
+
|
|
62
|
+
.. doctest::
|
|
63
|
+
|
|
64
|
+
>>> x = xnd(10 * [200 * [1]])
|
|
65
|
+
>>> x
|
|
66
|
+
xnd([[1, 1, 1, 1, 1, 1, 1, 1, 1, ...],
|
|
67
|
+
[1, 1, 1, 1, 1, 1, 1, 1, 1, ...],
|
|
68
|
+
[1, 1, 1, 1, 1, 1, 1, 1, 1, ...],
|
|
69
|
+
[1, 1, 1, 1, 1, 1, 1, 1, 1, ...],
|
|
70
|
+
[1, 1, 1, 1, 1, 1, 1, 1, 1, ...],
|
|
71
|
+
[1, 1, 1, 1, 1, 1, 1, 1, 1, ...],
|
|
72
|
+
[1, 1, 1, 1, 1, 1, 1, 1, 1, ...],
|
|
73
|
+
[1, 1, 1, 1, 1, 1, 1, 1, 1, ...],
|
|
74
|
+
[1, 1, 1, 1, 1, 1, 1, 1, 1, ...],
|
|
75
|
+
...],
|
|
76
|
+
type='10 * 200 * int64')
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
Values can be accessed in full using the *value* property:
|
|
80
|
+
|
|
81
|
+
.. doctest::
|
|
82
|
+
|
|
83
|
+
>>> x = xnd(11 * [1])
|
|
84
|
+
>>> x
|
|
85
|
+
xnd([1, 1, 1, 1, 1, 1, 1, 1, 1, ...], type='11 * int64')
|
|
86
|
+
>>> x.value
|
|
87
|
+
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
Types can be accessed using the *type* property:
|
|
91
|
+
|
|
92
|
+
.. doctest::
|
|
93
|
+
|
|
94
|
+
>>> x = xnd(11 * [1])
|
|
95
|
+
>>> x.type
|
|
96
|
+
ndt("11 * int64")
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
Ragged arrays
|
|
100
|
+
~~~~~~~~~~~~~
|
|
101
|
+
|
|
102
|
+
Ragged arrays are compatible with the Arrow list representation. The data
|
|
103
|
+
is pointer-free, addressing the elements works by having one offset array
|
|
104
|
+
per dimension.
|
|
105
|
+
|
|
106
|
+
.. doctest::
|
|
107
|
+
|
|
108
|
+
>>> xnd([[0.1j], [3+2j, 4+5j, 10j]])
|
|
109
|
+
xnd([[0.1j], [(3+2j), (4+5j), 10j]], type='var * var * complex128')
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
Indexing and slicing works as usual, returning properly typed views or
|
|
113
|
+
values in the case of scalars:
|
|
114
|
+
|
|
115
|
+
.. doctest::
|
|
116
|
+
|
|
117
|
+
>>> x = xnd([[0.1j], [3+2j, 4+5j, 10j]])
|
|
118
|
+
>>> x[1, 2]
|
|
119
|
+
xnd(10j, type='complex128')
|
|
120
|
+
|
|
121
|
+
>>> x[1]
|
|
122
|
+
xnd([(3+2j), (4+5j), 10j], type='var * complex128')
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
Eliminating dimensions through mixed slicing and indexing is not supported
|
|
126
|
+
because it would require copying and adjusting potentially huge offset arrays:
|
|
127
|
+
|
|
128
|
+
.. doctest::
|
|
129
|
+
|
|
130
|
+
>>> y = x[:, 1]
|
|
131
|
+
Traceback (most recent call last):
|
|
132
|
+
File "<stdin>", line 1, in <module>
|
|
133
|
+
IndexError: mixed indexing and slicing is not supported for var dimensions
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
Records (structs)
|
|
137
|
+
~~~~~~~~~~~~~~~~~
|
|
138
|
+
|
|
139
|
+
From Python 3.6 on, dicts retain their order, so they can be used directly
|
|
140
|
+
for initializing C structs.
|
|
141
|
+
|
|
142
|
+
.. doctest::
|
|
143
|
+
|
|
144
|
+
>>> xnd({'a': 'foo', 'b': 10.2})
|
|
145
|
+
xnd({'a': 'foo', 'b': 10.2}, type='{a : string, b : float64}')
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
Tuples
|
|
149
|
+
~~~~~~
|
|
150
|
+
|
|
151
|
+
Python tuples are directly translated to the libndtypes tuple type:
|
|
152
|
+
|
|
153
|
+
.. doctest::
|
|
154
|
+
|
|
155
|
+
>>> xnd(('foo', b'bar', [None, 10.0, 20.0]))
|
|
156
|
+
xnd(('foo', b'bar', [None, 10.0, 20.0]), type='(string, bytes, 3 * ?float64)')
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
Nested arrays in structs
|
|
160
|
+
~~~~~~~~~~~~~~~~~~~~~~~~
|
|
161
|
+
|
|
162
|
+
xnd seamlessly supports nested values of arbitrary depth:
|
|
163
|
+
|
|
164
|
+
.. doctest::
|
|
165
|
+
|
|
166
|
+
>>> lst = [{'name': 'John', 'internet_points': [1, 2, 3]},
|
|
167
|
+
... {'name': 'Jane', 'internet_points': [4, 5, 6]}]
|
|
168
|
+
>>> xnd(lst)
|
|
169
|
+
xnd([{'name': 'John', 'internet_points': [1, 2, 3]}, {'name': 'Jane', 'internet_points': [4, 5, 6]}],
|
|
170
|
+
type='2 * {name : string, internet_points : 3 * int64}')
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
Optional data (missing values)
|
|
174
|
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
175
|
+
|
|
176
|
+
Optional data is currently specified using *None*. It is under debate if
|
|
177
|
+
a separate *NA* singleton object would be more suitable.
|
|
178
|
+
|
|
179
|
+
.. doctest::
|
|
180
|
+
|
|
181
|
+
>>> lst = [0, 1, None, 2, 3, None, 5, 10]
|
|
182
|
+
>>> xnd(lst)
|
|
183
|
+
xnd([0, 1, None, 2, 3, None, 5, 10], type='8 * ?int64')
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
Categorical data
|
|
187
|
+
~~~~~~~~~~~~~~~~
|
|
188
|
+
|
|
189
|
+
Type inference would be ambiguous, so it cannot work directly. xnd supports
|
|
190
|
+
the *levels* argument that is internally translated to the type.
|
|
191
|
+
|
|
192
|
+
.. doctest::
|
|
193
|
+
|
|
194
|
+
>>> levels = ['January', 'August', 'December', None]
|
|
195
|
+
>>> x = xnd(['January', 'January', None, 'December', 'August', 'December', 'December'], levels=levels)
|
|
196
|
+
>>> x.value
|
|
197
|
+
['January', 'January', None, 'December', 'August', 'December', 'December']
|
|
198
|
+
>>> x.type
|
|
199
|
+
ndt("7 * categorical('January', 'August', 'December', NA)")
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
The above is equivalent to specifying the type directly:
|
|
203
|
+
|
|
204
|
+
.. doctest::
|
|
205
|
+
|
|
206
|
+
>>> from ndtypes import *
|
|
207
|
+
>>> t = ndt("7 * categorical('January', 'August', 'December', NA)")
|
|
208
|
+
>>> x = xnd(['January', 'January', None, 'December', 'August', 'December', 'December'], type=t)
|
|
209
|
+
>>> x.value
|
|
210
|
+
['January', 'January', None, 'December', 'August', 'December', 'December']
|
|
211
|
+
>>> x.type
|
|
212
|
+
ndt("7 * categorical('January', 'August', 'December', NA)")
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
Explicit types
|
|
216
|
+
--------------
|
|
217
|
+
|
|
218
|
+
While type inference is well-defined, it necessarily makes assumptions about
|
|
219
|
+
the programmer's intent.
|
|
220
|
+
|
|
221
|
+
There are two cases where types should be given:
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
Different types are intended
|
|
225
|
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
226
|
+
|
|
227
|
+
.. doctest::
|
|
228
|
+
|
|
229
|
+
>>> xnd([[0,1,2], [3,4,5]], type="2 * 3 * uint8")
|
|
230
|
+
xnd([[0, 1, 2], [3, 4, 5]], type='2 * 3 * uint8')
|
|
231
|
+
|
|
232
|
+
Here, type inference would deduce :c:macro:`int64`, so :c:macro:`uint8` needs
|
|
233
|
+
to be passed explicitly.
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
Performance
|
|
237
|
+
~~~~~~~~~~~
|
|
238
|
+
|
|
239
|
+
For large arrays, explicit types are significantly faster. Type inference
|
|
240
|
+
supports arbitrary nesting depth, is complex and still implemented in pure
|
|
241
|
+
Python. Compare:
|
|
242
|
+
|
|
243
|
+
.. doctest::
|
|
244
|
+
|
|
245
|
+
>>> lst = [1] * 1000000
|
|
246
|
+
>>> x = xnd(lst) # inference
|
|
247
|
+
>>>
|
|
248
|
+
>>> x = xnd(lst, type='1000000 * int64') # explicit
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
All supported types
|
|
252
|
+
-------------------
|
|
253
|
+
|
|
254
|
+
Fixed arrays
|
|
255
|
+
~~~~~~~~~~~~
|
|
256
|
+
|
|
257
|
+
Fixed arrays are similar to NumPy's ndarray. One difference is that internally
|
|
258
|
+
xnd uses steps instead of strides. One step is the amount of indices required
|
|
259
|
+
to move the linear index from one dimension element to the next.
|
|
260
|
+
|
|
261
|
+
This facilitates optional data, whose bitmaps need to be addressed by the
|
|
262
|
+
linear index. The equation *stride = step * itemsize* always holds.
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
.. doctest::
|
|
266
|
+
|
|
267
|
+
>>> xnd([[[1,2], [None, 3]], [[4, None], [5, 6]]])
|
|
268
|
+
xnd([[[1, 2], [None, 3]], [[4, None], [5, 6]]], type='2 * 2 * 2 * ?int64')
|
|
269
|
+
|
|
270
|
+
This is a fixed array with optional data.
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
.. doctest::
|
|
274
|
+
|
|
275
|
+
>>> xnd([(1,2.0,3j), (4,5.0,6j)])
|
|
276
|
+
xnd([(1, 2.0, 3j), (4, 5.0, 6j)], type='2 * (int64, float64, complex128)')
|
|
277
|
+
|
|
278
|
+
An array with tuple elements.
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
Fortran order
|
|
282
|
+
~~~~~~~~~~~~~
|
|
283
|
+
|
|
284
|
+
Fortran order is specified by prefixing the dimensions with an exclamation mark:
|
|
285
|
+
|
|
286
|
+
.. doctest::
|
|
287
|
+
|
|
288
|
+
>>> lst = [[1, 2, 3], [4, 5, 6]]
|
|
289
|
+
>>> x = xnd(lst, type='!2 * 3 * uint16')
|
|
290
|
+
>>>
|
|
291
|
+
>>> x.type.shape
|
|
292
|
+
(2, 3)
|
|
293
|
+
>>> x.type.strides
|
|
294
|
+
(2, 4)
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
Alternatively, steps can be passed as arguments to the fixed dimension type:
|
|
298
|
+
|
|
299
|
+
.. doctest::
|
|
300
|
+
|
|
301
|
+
>>> from ndtypes import *
|
|
302
|
+
>>> lst = [[1, 2, 3], [4, 5, 6]]
|
|
303
|
+
>>> t = ndt("fixed(shape=2, step=1) * fixed(shape=3, step=2) * uint16")
|
|
304
|
+
>>> x = xnd(lst, type=t)
|
|
305
|
+
>>> x.type.shape
|
|
306
|
+
(2, 3)
|
|
307
|
+
>>> x.type.strides
|
|
308
|
+
(2, 4)
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
Ragged arrays
|
|
312
|
+
~~~~~~~~~~~~~
|
|
313
|
+
|
|
314
|
+
Ragged arrays with explicit types are easiest to construct using the *dtype*
|
|
315
|
+
argument to the xnd constructor.
|
|
316
|
+
|
|
317
|
+
.. doctest::
|
|
318
|
+
|
|
319
|
+
>>> lst = [[0], [1, 2], [3, 4, 5]]
|
|
320
|
+
>>> xnd(lst, dtype="int32")
|
|
321
|
+
xnd([[0], [1, 2], [3, 4, 5]], type='var * var * int32')
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
Alternatively, offsets can be passed as arguments to the var dimension type:
|
|
325
|
+
|
|
326
|
+
.. doctest::
|
|
327
|
+
|
|
328
|
+
>>> from ndtypes import ndt
|
|
329
|
+
>>> t = ndt("var(offsets=[0,3]) * var(offsets=[0,1,3,6]) * int32")
|
|
330
|
+
>>> xnd(lst, type=t)
|
|
331
|
+
xnd([[0], [1, 2], [3, 4, 5]], type='var * var * int32')
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
Tuples
|
|
335
|
+
~~~~~~
|
|
336
|
+
|
|
337
|
+
In memory, tuples are the same as C structs.
|
|
338
|
+
|
|
339
|
+
.. doctest::
|
|
340
|
+
|
|
341
|
+
>>> xnd(("foo", 1.0))
|
|
342
|
+
xnd(('foo', 1.0), type='(string, float64)')
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
Indexing works the same as for arrays:
|
|
346
|
+
|
|
347
|
+
.. doctest::
|
|
348
|
+
|
|
349
|
+
>>> x = xnd(("foo", 1.0))
|
|
350
|
+
>>> x[0]
|
|
351
|
+
xnd('foo', type='string')
|
|
352
|
+
|
|
353
|
+
|
|
354
|
+
Nested tuples are more general than ragged arrays. They can a) hold different
|
|
355
|
+
data types and b) the trees they represent may be unbalanced.
|
|
356
|
+
|
|
357
|
+
They do not allow slicing though and are probably less efficient.
|
|
358
|
+
|
|
359
|
+
This is an example of an unbalanced tree that cannot be represented as a
|
|
360
|
+
ragged array:
|
|
361
|
+
|
|
362
|
+
.. doctest::
|
|
363
|
+
|
|
364
|
+
>>> unbalanced_tree = (((1.0, 2.0), (3.0)), 4.0, ((5.0, 6.0, 7.0), ()))
|
|
365
|
+
>>> x = xnd(unbalanced_tree)
|
|
366
|
+
>>> x.value
|
|
367
|
+
(((1.0, 2.0), 3.0), 4.0, ((5.0, 6.0, 7.0), ()))
|
|
368
|
+
>>> x.type
|
|
369
|
+
ndt("(((float64, float64), float64), float64, ((float64, float64, float64), ()))")
|
|
370
|
+
>>>
|
|
371
|
+
>>> x[0]
|
|
372
|
+
xnd(((1.0, 2.0), 3.0), type='((float64, float64), float64)')
|
|
373
|
+
>>> x[0][0]
|
|
374
|
+
xnd((1.0, 2.0), type='(float64, float64)')
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
Note that the data in the above tree example is packed into a single contiguous
|
|
378
|
+
memory block.
|
|
379
|
+
|
|
380
|
+
|
|
381
|
+
Records
|
|
382
|
+
~~~~~~~
|
|
383
|
+
|
|
384
|
+
In memory, records are C structs. The field names are only stored in the type.
|
|
385
|
+
|
|
386
|
+
The following examples use Python-3.6, which keeps the dict initialization
|
|
387
|
+
order.
|
|
388
|
+
|
|
389
|
+
.. doctest::
|
|
390
|
+
|
|
391
|
+
>>> x = xnd({'a': b'123', 'b': {'x': 1.2, 'y': 100+3j}})
|
|
392
|
+
>>> x.value
|
|
393
|
+
{'a': b'123', 'b': {'x': 1.2, 'y': (100+3j)}}
|
|
394
|
+
>>> x.type
|
|
395
|
+
ndt("{a : bytes, b : {x : float64, y : complex128}}")
|
|
396
|
+
|
|
397
|
+
|
|
398
|
+
Indexing works the same as for arrays. Additionally, fields can be indexed
|
|
399
|
+
by name:
|
|
400
|
+
|
|
401
|
+
.. doctest::
|
|
402
|
+
|
|
403
|
+
>>> x[0]
|
|
404
|
+
xnd(b'123', type='bytes')
|
|
405
|
+
>>> x['a']
|
|
406
|
+
xnd(b'123', type='bytes')
|
|
407
|
+
>>> x['b']
|
|
408
|
+
xnd({'x': 1.2, 'y': (100+3j)}, type='{x : float64, y : complex128}')
|
|
409
|
+
|
|
410
|
+
|
|
411
|
+
The nesting depth is arbitrary. In the following example, the data -- except
|
|
412
|
+
for strings, which are pointers -- is packed into a single contiguous memory
|
|
413
|
+
block:
|
|
414
|
+
|
|
415
|
+
.. code-block:: py
|
|
416
|
+
|
|
417
|
+
>>> from pprint import pprint
|
|
418
|
+
>>> item = {
|
|
419
|
+
... "id": 1001,
|
|
420
|
+
... "name": "cyclotron",
|
|
421
|
+
... "price": 5998321.99,
|
|
422
|
+
... "tags": ["connoisseur", "luxury"],
|
|
423
|
+
... "stock": {
|
|
424
|
+
... "warehouse": 722,
|
|
425
|
+
... "retail": 20
|
|
426
|
+
... }
|
|
427
|
+
... }
|
|
428
|
+
>>> x = xnd(item)
|
|
429
|
+
>>>
|
|
430
|
+
>>> pprint(x.value)
|
|
431
|
+
{'id': 1001,
|
|
432
|
+
'name': 'cyclotron',
|
|
433
|
+
'price': 5998321.99,
|
|
434
|
+
'stock': {'retail': 20, 'warehouse': 722},
|
|
435
|
+
'tags': ['connoisseur', 'luxury']}
|
|
436
|
+
>>>
|
|
437
|
+
>>> x.type.pprint()
|
|
438
|
+
{
|
|
439
|
+
id : int64,
|
|
440
|
+
name : string,
|
|
441
|
+
price : float64,
|
|
442
|
+
tags : 2 * string,
|
|
443
|
+
stock : {
|
|
444
|
+
warehouse : int64,
|
|
445
|
+
retail : int64
|
|
446
|
+
}
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
|
|
450
|
+
Strings can be embedded into the array by specifying the fixed string type.
|
|
451
|
+
In this case, the memory block is pointer-free.
|
|
452
|
+
|
|
453
|
+
.. code-block:: py
|
|
454
|
+
|
|
455
|
+
>>> from ndtypes import ndt
|
|
456
|
+
>>>
|
|
457
|
+
>>> t = """
|
|
458
|
+
... { id : int64,
|
|
459
|
+
... name : fixed_string(30),
|
|
460
|
+
... price : float64,
|
|
461
|
+
... tags : 2 * fixed_string(30),
|
|
462
|
+
... stock : {warehouse : int64, retail : int64}
|
|
463
|
+
... }
|
|
464
|
+
... """
|
|
465
|
+
>>>
|
|
466
|
+
>>> x = xnd(item, type=t)
|
|
467
|
+
>>> x.type.pprint()
|
|
468
|
+
{
|
|
469
|
+
id : int64,
|
|
470
|
+
name : fixed_string(30),
|
|
471
|
+
price : float64,
|
|
472
|
+
tags : 2 * fixed_string(30),
|
|
473
|
+
stock : {
|
|
474
|
+
warehouse : int64,
|
|
475
|
+
retail : int64
|
|
476
|
+
}
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
|
|
480
|
+
Record of arrays
|
|
481
|
+
~~~~~~~~~~~~~~~~
|
|
482
|
+
|
|
483
|
+
Often it is more memory efficient to store an array of records as a record of
|
|
484
|
+
arrays. This example with columnar data is from the Arrow homepage:
|
|
485
|
+
|
|
486
|
+
.. doctest::
|
|
487
|
+
|
|
488
|
+
>>> data = {'session_id': [1331247700, 1331247702, 1331247709, 1331247799],
|
|
489
|
+
... 'timestamp': [1515529735.4895875, 1515529746.2128427, 1515529756.4485607, 1515529766.2181058],
|
|
490
|
+
... 'source_ip': ['8.8.8.100', '100.2.0.11', '99.101.22.222', '12.100.111.200']}
|
|
491
|
+
>>> x = xnd(data)
|
|
492
|
+
>>> x.type
|
|
493
|
+
ndt("{session_id : 4 * int64, timestamp : 4 * float64, source_ip : 4 * string}")
|
|
494
|
+
|
|
495
|
+
|
|
496
|
+
|
|
497
|
+
References
|
|
498
|
+
~~~~~~~~~~
|
|
499
|
+
|
|
500
|
+
References are transparent pointers to new memory blocks (meaning a new
|
|
501
|
+
data pointer, not a whole new xnd buffer).
|
|
502
|
+
|
|
503
|
+
For example, this is an array of pointer to array:
|
|
504
|
+
|
|
505
|
+
.. doctest::
|
|
506
|
+
|
|
507
|
+
>>> t = ndt("3 * ref(4 * uint64)")
|
|
508
|
+
>>> lst = [[0,1,2,3], [4,5,6,7], [8,9,10,11]]
|
|
509
|
+
>>> xnd(lst, type=t)
|
|
510
|
+
xnd([[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]], type='3 * ref(4 * uint64)')
|
|
511
|
+
|
|
512
|
+
The user sees no difference to a regular 3 by 4 array, but internally
|
|
513
|
+
the outer dimension consists of three pointers to the inner arrays.
|
|
514
|
+
|
|
515
|
+
For memory blocks generated by xnd itself the feature is not so useful --
|
|
516
|
+
after all, it is usually better to have a single memory block than one
|
|
517
|
+
with additional pointers.
|
|
518
|
+
|
|
519
|
+
|
|
520
|
+
However, suppose that in the above columnar data example another application
|
|
521
|
+
represents the arrays inside the record with pointers. Using the *ref* type,
|
|
522
|
+
data structures borrowed from such an application can be properly typed:
|
|
523
|
+
|
|
524
|
+
.. doctest::
|
|
525
|
+
|
|
526
|
+
>>> t = ndt("{session_id : &4 * int64, timestamp : &4 * float64, source_ip : &4 * string}")
|
|
527
|
+
>>> x = xnd(data, type=t)
|
|
528
|
+
>>> x.type
|
|
529
|
+
ndt("{session_id : ref(4 * int64), timestamp : ref(4 * float64), source_ip : ref(4 * string)}")
|
|
530
|
+
|
|
531
|
+
The ampersand is the shorthand for "ref".
|
|
532
|
+
|
|
533
|
+
|
|
534
|
+
|
|
535
|
+
Constructors
|
|
536
|
+
~~~~~~~~~~~~
|
|
537
|
+
|
|
538
|
+
Constructors are xnd's way of creating distinct named types. The constructor
|
|
539
|
+
argument is a regular type.
|
|
540
|
+
|
|
541
|
+
Constructors open up a new dtype, so named arrays can be the dtype of
|
|
542
|
+
other arrays. Type inference currently isn't aware of constructors,
|
|
543
|
+
so types have to be provided.
|
|
544
|
+
|
|
545
|
+
.. doctest::
|
|
546
|
+
|
|
547
|
+
>>> t = ndt("3 * SomeMatrix(2 * 2 * float32)")
|
|
548
|
+
>>> lst = [[[1,2], [3,4]], [[5,6], [7,8]], [[9,10], [11,12]]]
|
|
549
|
+
>>> x = xnd(lst, type=t)
|
|
550
|
+
>>> x
|
|
551
|
+
xnd([[[1.0, 2.0], [3.0, 4.0]], [[5.0, 6.0], [7.0, 8.0]], [[9.0, 10.0], [11.0, 12.0]]],
|
|
552
|
+
type='3 * SomeMatrix(2 * 2 * float32)')
|
|
553
|
+
>>> x[0]
|
|
554
|
+
xnd([[1.0, 2.0], [3.0, 4.0]], type='SomeMatrix(2 * 2 * float32)')
|
|
555
|
+
|
|
556
|
+
|
|
557
|
+
Categorical
|
|
558
|
+
~~~~~~~~~~~
|
|
559
|
+
|
|
560
|
+
Categorical types contain values. The data stored in xnd buffers are indices
|
|
561
|
+
(:c:macro:`int64`) into the type's categories.
|
|
562
|
+
|
|
563
|
+
.. doctest::
|
|
564
|
+
|
|
565
|
+
>>> t = ndt("categorical('a', 'b', 'c', NA)")
|
|
566
|
+
>>> data = ['a', 'a', 'b', 'a', 'a', 'a', 'foo', 'c']
|
|
567
|
+
>>> x = xnd(data, dtype=t)
|
|
568
|
+
>>> x.value
|
|
569
|
+
['a', 'a', 'b', 'a', 'a', 'a', None, 'c']
|
|
570
|
+
|
|
571
|
+
|
|
572
|
+
Fixed String
|
|
573
|
+
~~~~~~~~~~~~
|
|
574
|
+
|
|
575
|
+
Fixed strings are embedded into arrays. Supported encodings are 'ascii',
|
|
576
|
+
'utf8', 'utf16' and 'utf32'. The string size argument denotes the number
|
|
577
|
+
of code points rather than bytes.
|
|
578
|
+
|
|
579
|
+
.. doctest::
|
|
580
|
+
|
|
581
|
+
>>> t = ndt("10 * fixed_string(3, 'utf32')")
|
|
582
|
+
>>> x = xnd.empty(t)
|
|
583
|
+
>>> x.value
|
|
584
|
+
['', '', '', '', '', '', '', '', '', '']
|
|
585
|
+
>>> x[3] = "\U000003B1\U000003B2\U000003B3"
|
|
586
|
+
>>> x.value
|
|
587
|
+
['', '', '', 'αβγ', '', '', '', '', '', '']
|
|
588
|
+
|
|
589
|
+
|
|
590
|
+
Fixed Bytes
|
|
591
|
+
~~~~~~~~~~~
|
|
592
|
+
|
|
593
|
+
Fixed bytes are embedded into arrays.
|
|
594
|
+
|
|
595
|
+
.. doctest::
|
|
596
|
+
|
|
597
|
+
>>> t = ndt("3 * fixed_bytes(size=3)")
|
|
598
|
+
>>> x = xnd.empty(t)
|
|
599
|
+
>>> x[2] = b'123'
|
|
600
|
+
>>> x.value
|
|
601
|
+
[b'\x00\x00\x00', b'\x00\x00\x00', b'123']
|
|
602
|
+
>>> x.align
|
|
603
|
+
1
|
|
604
|
+
|
|
605
|
+
Alignment can be requested with the requirement that size is a multiple of
|
|
606
|
+
alignment:
|
|
607
|
+
|
|
608
|
+
.. doctest::
|
|
609
|
+
|
|
610
|
+
>>> t = ndt("3 * fixed_bytes(size=32, align=16)")
|
|
611
|
+
>>> x = xnd.empty(t)
|
|
612
|
+
>>> x.align
|
|
613
|
+
16
|
|
614
|
+
|
|
615
|
+
|
|
616
|
+
String
|
|
617
|
+
~~~~~~
|
|
618
|
+
|
|
619
|
+
Strings are pointers to :c:macro:`NUL`-terminated UTF-8 strings.
|
|
620
|
+
|
|
621
|
+
.. doctest::
|
|
622
|
+
|
|
623
|
+
>>> x = xnd.empty("10 * string")
|
|
624
|
+
>>> x.value
|
|
625
|
+
['', '', '', '', '', '', '', '', '', '']
|
|
626
|
+
>>> x[0] = "abc"
|
|
627
|
+
>>> x.value
|
|
628
|
+
['abc', '', '', '', '', '', '', '', '', '']
|
|
629
|
+
|
|
630
|
+
|
|
631
|
+
|
|
632
|
+
Bytes
|
|
633
|
+
~~~~~
|
|
634
|
+
|
|
635
|
+
Internally, bytes are structs with a size field and a pointer to the data.
|
|
636
|
+
|
|
637
|
+
.. doctest::
|
|
638
|
+
|
|
639
|
+
>>> xnd([b'123', b'45678'])
|
|
640
|
+
xnd([b'123', b'45678'], type='2 * bytes')
|
|
641
|
+
|
|
642
|
+
|
|
643
|
+
The bytes constructor takes an optional *align* argument that specifies the
|
|
644
|
+
alignment of the allocated data:
|
|
645
|
+
|
|
646
|
+
.. doctest::
|
|
647
|
+
|
|
648
|
+
>>> x = xnd([b'abc', b'123'], type="2 * bytes(align=64)")
|
|
649
|
+
>>> x.value
|
|
650
|
+
[b'abc', b'123']
|
|
651
|
+
>>> x.align
|
|
652
|
+
8
|
|
653
|
+
|
|
654
|
+
Note that *x.align* is the alignment of the array. The embedded pointers
|
|
655
|
+
to the bytes data are aligned at *64*.
|
|
656
|
+
|
|
657
|
+
|
|
658
|
+
Primitive types
|
|
659
|
+
~~~~~~~~~~~~~~~
|
|
660
|
+
|
|
661
|
+
As a short example, here is a tuple that contains all primitive types:
|
|
662
|
+
|
|
663
|
+
.. doctest::
|
|
664
|
+
|
|
665
|
+
>>> s = """
|
|
666
|
+
... (bool,
|
|
667
|
+
... int8, int16, int32, int64,
|
|
668
|
+
... uint8, uint16, uint32, uint64,
|
|
669
|
+
... float16, float32, float64,
|
|
670
|
+
... complex32, complex64, complex128)
|
|
671
|
+
... """
|
|
672
|
+
>>> x = xnd.empty(s)
|
|
673
|
+
>>> x.value
|
|
674
|
+
(False, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0, 0.0, 0j, 0j, 0j)
|