xnd 0.2.0dev3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. checksums.yaml +7 -0
  2. data/CONTRIBUTING.md +42 -0
  3. data/Gemfile +3 -0
  4. data/History.md +0 -0
  5. data/README.md +7 -0
  6. data/Rakefile +135 -0
  7. data/ext/ruby_xnd/extconf.rb +70 -0
  8. data/ext/ruby_xnd/float_pack_unpack.c +277 -0
  9. data/ext/ruby_xnd/float_pack_unpack.h +39 -0
  10. data/ext/ruby_xnd/gc_guard.c +36 -0
  11. data/ext/ruby_xnd/gc_guard.h +12 -0
  12. data/ext/ruby_xnd/include/xnd.h +449 -0
  13. data/ext/ruby_xnd/lib/libxnd.a +0 -0
  14. data/ext/ruby_xnd/lib/libxnd.so +1 -0
  15. data/ext/ruby_xnd/lib/libxnd.so.0 +1 -0
  16. data/ext/ruby_xnd/lib/libxnd.so.0.2.0dev3 +0 -0
  17. data/ext/ruby_xnd/memory_block_object.c +32 -0
  18. data/ext/ruby_xnd/memory_block_object.h +33 -0
  19. data/ext/ruby_xnd/ruby_xnd.c +1953 -0
  20. data/ext/ruby_xnd/ruby_xnd.h +61 -0
  21. data/ext/ruby_xnd/ruby_xnd_internal.h +85 -0
  22. data/ext/ruby_xnd/util.h +170 -0
  23. data/ext/ruby_xnd/xnd/AUTHORS.txt +5 -0
  24. data/ext/ruby_xnd/xnd/INSTALL.txt +134 -0
  25. data/ext/ruby_xnd/xnd/LICENSE.txt +29 -0
  26. data/ext/ruby_xnd/xnd/MANIFEST.in +3 -0
  27. data/ext/ruby_xnd/xnd/Makefile.in +80 -0
  28. data/ext/ruby_xnd/xnd/README.rst +44 -0
  29. data/ext/ruby_xnd/xnd/config.guess +1530 -0
  30. data/ext/ruby_xnd/xnd/config.h.in +22 -0
  31. data/ext/ruby_xnd/xnd/config.sub +1782 -0
  32. data/ext/ruby_xnd/xnd/configure +4867 -0
  33. data/ext/ruby_xnd/xnd/configure.ac +164 -0
  34. data/ext/ruby_xnd/xnd/doc/Makefile +14 -0
  35. data/ext/ruby_xnd/xnd/doc/_static/copybutton.js +66 -0
  36. data/ext/ruby_xnd/xnd/doc/conf.py +26 -0
  37. data/ext/ruby_xnd/xnd/doc/index.rst +44 -0
  38. data/ext/ruby_xnd/xnd/doc/libxnd/data-structures.rst +186 -0
  39. data/ext/ruby_xnd/xnd/doc/libxnd/functions.rst +148 -0
  40. data/ext/ruby_xnd/xnd/doc/libxnd/index.rst +25 -0
  41. data/ext/ruby_xnd/xnd/doc/releases/index.rst +34 -0
  42. data/ext/ruby_xnd/xnd/doc/xnd/align-pack.rst +96 -0
  43. data/ext/ruby_xnd/xnd/doc/xnd/buffer-protocol.rst +42 -0
  44. data/ext/ruby_xnd/xnd/doc/xnd/index.rst +30 -0
  45. data/ext/ruby_xnd/xnd/doc/xnd/quickstart.rst +62 -0
  46. data/ext/ruby_xnd/xnd/doc/xnd/types.rst +674 -0
  47. data/ext/ruby_xnd/xnd/install-sh +527 -0
  48. data/ext/ruby_xnd/xnd/libxnd/Makefile.in +102 -0
  49. data/ext/ruby_xnd/xnd/libxnd/Makefile.vc +112 -0
  50. data/ext/ruby_xnd/xnd/libxnd/bitmaps.c +345 -0
  51. data/ext/ruby_xnd/xnd/libxnd/contrib.h +313 -0
  52. data/ext/ruby_xnd/xnd/libxnd/copy.c +944 -0
  53. data/ext/ruby_xnd/xnd/libxnd/equal.c +1216 -0
  54. data/ext/ruby_xnd/xnd/libxnd/inline.h +154 -0
  55. data/ext/ruby_xnd/xnd/libxnd/overflow.h +147 -0
  56. data/ext/ruby_xnd/xnd/libxnd/split.c +286 -0
  57. data/ext/ruby_xnd/xnd/libxnd/tests/Makefile.in +39 -0
  58. data/ext/ruby_xnd/xnd/libxnd/tests/Makefile.vc +44 -0
  59. data/ext/ruby_xnd/xnd/libxnd/tests/README.txt +2 -0
  60. data/ext/ruby_xnd/xnd/libxnd/tests/runtest.c +101 -0
  61. data/ext/ruby_xnd/xnd/libxnd/tests/test.h +48 -0
  62. data/ext/ruby_xnd/xnd/libxnd/tests/test_fixed.c +108 -0
  63. data/ext/ruby_xnd/xnd/libxnd/xnd.c +1304 -0
  64. data/ext/ruby_xnd/xnd/libxnd/xnd.h +449 -0
  65. data/ext/ruby_xnd/xnd/python/test_xnd.py +3144 -0
  66. data/ext/ruby_xnd/xnd/python/xnd/__init__.py +290 -0
  67. data/ext/ruby_xnd/xnd/python/xnd/_xnd.c +2822 -0
  68. data/ext/ruby_xnd/xnd/python/xnd/contrib/pretty.py +850 -0
  69. data/ext/ruby_xnd/xnd/python/xnd/docstrings.h +129 -0
  70. data/ext/ruby_xnd/xnd/python/xnd/pyxnd.h +200 -0
  71. data/ext/ruby_xnd/xnd/python/xnd/util.h +182 -0
  72. data/ext/ruby_xnd/xnd/python/xnd_randvalue.py +1121 -0
  73. data/ext/ruby_xnd/xnd/python/xnd_support.py +106 -0
  74. data/ext/ruby_xnd/xnd/setup.py +303 -0
  75. data/ext/ruby_xnd/xnd/vcbuild/INSTALL.txt +42 -0
  76. data/ext/ruby_xnd/xnd/vcbuild/runtest32.bat +16 -0
  77. data/ext/ruby_xnd/xnd/vcbuild/runtest64.bat +14 -0
  78. data/ext/ruby_xnd/xnd/vcbuild/vcbuild32.bat +29 -0
  79. data/ext/ruby_xnd/xnd/vcbuild/vcbuild64.bat +29 -0
  80. data/ext/ruby_xnd/xnd/vcbuild/vcclean.bat +13 -0
  81. data/ext/ruby_xnd/xnd/vcbuild/vcdistclean.bat +14 -0
  82. data/lib/ruby_xnd.so +0 -0
  83. data/lib/xnd.rb +306 -0
  84. data/lib/xnd/monkeys.rb +29 -0
  85. data/lib/xnd/version.rb +6 -0
  86. data/spec/debug_spec.rb +9 -0
  87. data/spec/gc_guard_spec.rb +10 -0
  88. data/spec/leakcheck.rb +9 -0
  89. data/spec/spec_helper.rb +877 -0
  90. data/spec/type_inference_spec.rb +81 -0
  91. data/spec/xnd_spec.rb +2921 -0
  92. data/xnd.gemspec +47 -0
  93. metadata +215 -0
@@ -0,0 +1,148 @@
1
+ .. meta::
2
+ :robots: index,follow
3
+ :description: libndtypes documentation
4
+
5
+ .. sectionauthor:: Stefan Krah <skrah at bytereef.org>
6
+
7
+
8
+ Functions
9
+ =========
10
+
11
+ Create typed memory blocks
12
+ --------------------------
13
+
14
+ The main use case for libxnd is to create and manage typed memory blocks.
15
+ These blocks are fully initialized to *0*. References to additional memory
16
+ blocks are allocated and initialized recursively.
17
+
18
+ *bytes* and *string* types are initialized to :c:macro:`NULL`, since their
19
+ actual length is not known yet.
20
+
21
+
22
+ .. topic:: xnd_empty_from_string
23
+
24
+ .. code-block:: c
25
+
26
+ xnd_master_t *xnd_empty_from_string(const char *s, uint32_t flags, ndt_context_t *ctx);
27
+
28
+ Return a new master buffer according to the type string in *s*. *flags*
29
+ must include :c:macro:`XND_OWN_TYPE`.
30
+
31
+
32
+ .. topic:: xnd_empty_from_type
33
+
34
+ .. code-block:: c
35
+
36
+ xnd_master_t *xnd_empty_from_type(const ndt_t *t, uint32_t flags, ndt_context_t *ctx);
37
+
38
+
39
+ Return a new master buffer according to *type*. *flags* must not include
40
+ :c:macro:`XND_OWN_TYPE`, i.e. the type is externally managed.
41
+
42
+ This is the case in the Python bindings, where the ndtypes module creates
43
+ and manages types.
44
+
45
+
46
+ Delete typed memory blocks
47
+ --------------------------
48
+
49
+ .. topic:: xnd_del
50
+
51
+ .. code-block:: c
52
+
53
+ void xnd_del(xnd_master_t *x);
54
+
55
+ Delete the master buffer according to its flags. *x* may be :c:macro:`NULL`.
56
+ *x->master.ptr* and *x->master.type* may be :c:macro:`NULL`.
57
+
58
+ The latter situation should only arise when breaking up reference cycles.
59
+ This is used in the Python module.
60
+
61
+
62
+ Bitmaps
63
+ -------
64
+
65
+ .. topic:: xnd_bitmap_next
66
+
67
+ .. code-block:: c
68
+
69
+ xnd_bitmap_t xnd_bitmap_next(const xnd_t *x, int64_t i, ndt_context_t *ctx);
70
+
71
+ Get the next bitmap for the *Tuple*, *Record*, *Ref* and *Constr* types.
72
+
73
+ This is a convenience function that checks if the types have optional
74
+ subtrees.
75
+
76
+ If yes, return the bitmap at index *i*. If not, it return an empty bitmap
77
+ that must not be accessed.
78
+
79
+
80
+ .. topic:: xnd_set_valid
81
+
82
+ .. code-block:: c
83
+
84
+ void xnd_set_valid(xnd_t *x);
85
+
86
+ Set the validity bit at *x->index*. *x* must have an optional type.
87
+
88
+
89
+ .. topic:: xnd_set_na
90
+
91
+ .. code-block:: c
92
+
93
+ void xnd_set_na(xnd_t *x);
94
+
95
+ Clear the validity bit at *x->index*. *x* must have an optional type.
96
+
97
+
98
+ .. topic:: xnd_is_valid
99
+
100
+ .. code-block:: c
101
+
102
+ int xnd_is_valid(const xnd_t *x);
103
+
104
+ Check if the element at *x->index* is valid. If *x* does not have an optional
105
+ type, return *1*. Otherwise, return the validity bit (zero or nonzero).
106
+
107
+
108
+ .. topic:: xnd_is_na
109
+
110
+ .. code-block:: c
111
+
112
+ int xnd_is_na(const xnd_t *x);
113
+
114
+ Check if the element at *x->index* is valid. If *x* does not have an optional
115
+ type, return *0*. Otherwise, return the negation of the validity bit.
116
+
117
+
118
+ .. topic:: xnd_subtree
119
+
120
+ .. code-block:: c
121
+
122
+ xnd_t xnd_subtree(const xnd_t *x, const xnd_index_t indices[], int len,
123
+ ndt_context_t *ctx);
124
+
125
+ Apply zero or more indices to the input *x* and return a typed view. Valid
126
+ indices are integers or strings for record fields.
127
+
128
+ This function is more general than pure array indexing, hence the name. For
129
+ example, it is possible to index into nested records that in turn contain
130
+ arrays.
131
+
132
+
133
+ .. topic:: xnd_multikey
134
+
135
+ .. code-block:: c
136
+
137
+ xnd_t xnd_multikey(const xnd_t *x, const xnd_index_t indices[], int len,
138
+ ndt_context_t *ctx);
139
+
140
+ Apply zero or more keys to the input *x* and return a typed view. Valid
141
+ keys are integers or slices.
142
+
143
+ This function differs from :c:func:`xnd_subtree` in that it allows
144
+ mixed indexing and slicing for fixed dimensions. Records and tuples
145
+ cannot be sliced.
146
+
147
+ Variable dimensions can be sliced, but do not support mixed indexing
148
+ and slicing.
@@ -0,0 +1,25 @@
1
+ .. meta::
2
+ :robots: index, follow
3
+ :description: libxnd documentation
4
+ :keywords: libxnd, C, array computing
5
+
6
+ .. sectionauthor:: Stefan Krah <skrah at bytereef.org>
7
+
8
+
9
+ libxnd
10
+ ------
11
+
12
+ libxnd implements support for typed memory blocks using the libndtypes
13
+ type library.
14
+
15
+ Types include ndarrays, ragged arrays (compatible with the Arrow list type),
16
+ optional data (bitmaps are compatible with Arrow), tuples, records (structs),
17
+ strings, bytes and categorical values.
18
+
19
+
20
+ .. toctree::
21
+
22
+ data-structures.rst
23
+ functions.rst
24
+
25
+
@@ -0,0 +1,34 @@
1
+ .. meta::
2
+ :robots: index, follow
3
+ :description: libndtypes documentation
4
+ :keywords: libndtypes, C, array computing
5
+
6
+ .. sectionauthor:: Stefan Krah <skrah at bytereef.org>
7
+
8
+
9
+ ========
10
+ Releases
11
+ ========
12
+
13
+
14
+ v0.2.0b2 (February 5th 2018)
15
+ ============================
16
+
17
+ Second release (beta2). This release addresses several build and packaging issues:
18
+
19
+ - Avoid copying libraries into the Python package if system libraries are used.
20
+
21
+ - The build and install partially relied on the dev setup (ndtypes checked out
22
+ in the xnd directory). This dependency has been removed.
23
+
24
+ - The conda build now supports separate library and Python module installs.
25
+
26
+ - Configure now has a **–without-docs** option for skipping the doc install.
27
+
28
+
29
+ v0.2.0b1 (January 20th 2018)
30
+ ============================
31
+
32
+ First release (beta1).
33
+
34
+
@@ -0,0 +1,96 @@
1
+ .. meta::
2
+ :robots: index,follow
3
+ :description: xnd container
4
+ :keywords: xnd, alignment, packing
5
+
6
+ .. sectionauthor:: Stefan Krah <skrah at bytereef.org>
7
+
8
+
9
+ Alignment and packing
10
+ =====================
11
+
12
+ The xnd memory allocators support explicit alignment. Alignment is specified
13
+ in the types.
14
+
15
+
16
+ Tuples and records
17
+ ------------------
18
+
19
+ Tuples and records have the *align* and *pack* keywords that have the same
20
+ purpose as gcc's *aligned* and *packed* struct attributes.
21
+
22
+
23
+ Field alignment
24
+ ~~~~~~~~~~~~~~~
25
+
26
+ The *align* keyword can be used to specify an alignment that is greater
27
+ than the natural alignment of a field:
28
+
29
+ .. doctest::
30
+
31
+ >>> from xnd import *
32
+ >>> s = "(uint8, uint64 |align=32|, uint64)"
33
+ >>> x = xnd.empty(s)
34
+ >>> x.align
35
+ 32
36
+ >>> x.type.datasize
37
+ 64
38
+
39
+
40
+
41
+ Field packing
42
+ ~~~~~~~~~~~~~
43
+
44
+ The *pack* keyword can be used to specify an alignment that is smaller
45
+ than the natural alignment of a field:
46
+
47
+ .. doctest::
48
+
49
+ >>> s = "(uint8, uint64 |pack=2|, uint64)"
50
+ >>> x = xnd.empty(s)
51
+ >>> x.align
52
+ 8
53
+ >>> x.type.datasize
54
+ 24
55
+
56
+
57
+
58
+ Struct packing
59
+ ~~~~~~~~~~~~~~
60
+
61
+ The *pack* and *align* keywords can be applied to the entire struct:
62
+
63
+ .. doctest::
64
+
65
+ >>> s = "(uint8, uint64, uint64, pack=1)"
66
+ >>> x = xnd.empty(s)
67
+ >>> x.align
68
+ 1
69
+ >>> x.type.datasize
70
+ 17
71
+
72
+
73
+ Individual field and struct directives are mutually exclusive:
74
+
75
+ .. doctest::
76
+
77
+ >>> s = "2 * (uint8 |align=16|, uint64, pack=1)"
78
+ >>> x = xnd.empty(s)
79
+ Traceback (most recent call last):
80
+ File "<stdin>", line 1, in <module>
81
+ ValueError: cannot have 'pack' tuple attribute and field attributes
82
+
83
+
84
+ Array alignment
85
+ ~~~~~~~~~~~~~~~
86
+
87
+ An array has the same alignment as its elements:
88
+
89
+ .. doctest::
90
+
91
+ >>> s = "2 * (uint8, uint64, pack=1)"
92
+ >>> x = xnd.empty(s)
93
+ >>> x.align
94
+ 1
95
+ >>> x.type.datasize
96
+ 18
@@ -0,0 +1,42 @@
1
+ .. meta::
2
+ :robots: index,follow
3
+ :description: xnd container
4
+ :keywords: xnd, buffer protocol
5
+
6
+ .. sectionauthor:: Stefan Krah <skrah at bytereef.org>
7
+
8
+
9
+ Buffer protocol
10
+ ===============
11
+
12
+ xnd supports importing PEP-3118 buffers.
13
+
14
+
15
+ From NumPy
16
+ ----------
17
+
18
+ Import a simple ndarray:
19
+
20
+ .. doctest::
21
+
22
+ >>> import numpy as np
23
+ >>> from xnd import *
24
+ >>> x = np.array([[[0,1,2], [3,4,5]], [[6,7,8], [9,10,11]]])
25
+ >>> y = xnd.from_buffer(x)
26
+ >>> y.type
27
+ ndt("2 * 2 * 3 * int64")
28
+ >>> y.value
29
+ [[[0, 1, 2], [3, 4, 5]], [[6, 7, 8], [9, 10, 11]]]
30
+
31
+
32
+ Import an ndarray with a struct dtype:
33
+
34
+ .. doctest::
35
+
36
+ >>> x = np.array([(1000, 400.25, 'abc'), (-23, -1e10, 'cba')],
37
+ ... dtype=[('x', '<i4'), ('y', '>f4'), ('z', 'S3')])
38
+ >>> y = xnd.from_buffer(x)
39
+ >>> y.type
40
+ ndt("2 * {x : int32, y : >float32, z : fixed_bytes(size=3)}")
41
+ >>> y.value
42
+ [{'x': 1000, 'y': 400.25, 'z': b'abc'}, {'x': -23, 'y': -10000000000.0, 'z': b'cba'}]
@@ -0,0 +1,30 @@
1
+ .. meta::
2
+ :robots: index, follow
3
+ :description: xnd documentation
4
+ :keywords: memory blocks, unboxed values, array computing, Python
5
+
6
+ .. sectionauthor:: Stefan Krah <skrah at bytereef.org>
7
+
8
+
9
+ xnd
10
+ ---
11
+
12
+ The xnd module implements a container type that maps most Python values
13
+ relevant for scientific computing directly to typed memory.
14
+
15
+ Whenever possible, a single, pointer-free memory block is used.
16
+
17
+ xnd supports ragged arrays, categorical types, indexing, slicing, aligned memory blocks and type inference.
18
+
19
+ Operations like indexing and slicing return zero-copy typed views on the data.
20
+
21
+ Importing PEP-3118 buffers is supported.
22
+
23
+
24
+ .. toctree::
25
+ :maxdepth: 1
26
+
27
+ types.rst
28
+ align-pack.rst
29
+ buffer-protocol.rst
30
+ quickstart.rst
@@ -0,0 +1,62 @@
1
+ .. meta::
2
+ :robots: index,follow
3
+ :description: xnd quickstart
4
+ :keywords: xnd, install
5
+
6
+ .. sectionauthor:: Stefan Krah <skrah at bytereef.org>
7
+
8
+
9
+ Quick Start
10
+ ===========
11
+
12
+ Prerequisites
13
+ ~~~~~~~~~~~~~
14
+
15
+ Python2 is not supported. If not already present, install the Python3
16
+ development packages:
17
+
18
+ .. code-block:: sh
19
+
20
+ # Debian, Ubuntu:
21
+ sudo apt-get install gcc make
22
+ sudo apt-get install python3-dev
23
+
24
+ # Fedora, RedHat:
25
+ sudo yum install gcc make
26
+ sudo yum install python3-devel
27
+
28
+ # openSUSE:
29
+ sudo zypper install gcc make
30
+ sudo zypper install python3-devel
31
+
32
+ # BSD:
33
+ # You know what to do.
34
+
35
+ # Mac OS X:
36
+ # Install Xcode and Python 3 headers.
37
+
38
+
39
+ Install
40
+ ~~~~~~~
41
+
42
+ If `pip <http://pypi.python.org/pypi/pip>`_ is present on the system, installation
43
+ should be as easy as:
44
+
45
+ .. code-block:: sh
46
+
47
+ pip install xnd
48
+
49
+
50
+ Otherwise:
51
+
52
+ .. code-block:: sh
53
+
54
+ tar xvzf xnd.2.0b1.tar.gz
55
+ cd xnd.2.0b1
56
+ python3 setup.py install
57
+
58
+
59
+ Windows
60
+ ~~~~~~~
61
+
62
+ Refer to the instructions in the *vcbuild* directory in the source distribution.
@@ -0,0 +1,674 @@
1
+ .. meta::
2
+ :robots: index,follow
3
+ :description: xnd container
4
+ :keywords: xnd, types, examples
5
+
6
+ .. sectionauthor:: Stefan Krah <skrah at bytereef.org>
7
+
8
+
9
+ Types
10
+ =====
11
+
12
+ The xnd object is a container that maps a wide range of Python values directly
13
+ to memory. xnd unpacks complex types of arbitrary nesting depth to a single
14
+ memory block.
15
+
16
+ Pointers only occur in explicit pointer types like *Ref* (reference), *Bytes*
17
+ and *String*, but not in the general case.
18
+
19
+
20
+ Type inference
21
+ --------------
22
+
23
+ If no explicit type is given, xnd supports type inference by assuming
24
+ types for the most common Python values.
25
+
26
+
27
+ Fixed arrays
28
+ ~~~~~~~~~~~~
29
+
30
+ .. doctest::
31
+
32
+ >>> from xnd import *
33
+ >>> x = xnd([[0, 1, 2], [3, 4, 5]])
34
+ >>> x
35
+ xnd([[0, 1, 2], [3, 4, 5]], type='2 * 3 * int64')
36
+
37
+
38
+ As expected, lists are mapped to ndarrays and integers to int64. Indexing and
39
+ slicing works the usual way. For performance reasons these operations return
40
+ zero-copy views whenever possible:
41
+
42
+ .. doctest::
43
+
44
+ >>> x[0][1] # Indexing returns views, even for scalars.
45
+ xnd(1, type='int64')
46
+ >>>
47
+ >>> y = x[:, ::-1] # Containers are returned as views.
48
+ >>> y
49
+ xnd([[2, 1, 0], [5, 4, 3]], type='2 * 3 * int64')
50
+
51
+
52
+ Subarrays are views and properly typed:
53
+
54
+ .. doctest::
55
+
56
+ >>> x[1]
57
+ xnd([3, 4, 5], type='3 * int64')
58
+
59
+
60
+ The representation of large values is abbreviated:
61
+
62
+ .. doctest::
63
+
64
+ >>> x = xnd(10 * [200 * [1]])
65
+ >>> x
66
+ xnd([[1, 1, 1, 1, 1, 1, 1, 1, 1, ...],
67
+ [1, 1, 1, 1, 1, 1, 1, 1, 1, ...],
68
+ [1, 1, 1, 1, 1, 1, 1, 1, 1, ...],
69
+ [1, 1, 1, 1, 1, 1, 1, 1, 1, ...],
70
+ [1, 1, 1, 1, 1, 1, 1, 1, 1, ...],
71
+ [1, 1, 1, 1, 1, 1, 1, 1, 1, ...],
72
+ [1, 1, 1, 1, 1, 1, 1, 1, 1, ...],
73
+ [1, 1, 1, 1, 1, 1, 1, 1, 1, ...],
74
+ [1, 1, 1, 1, 1, 1, 1, 1, 1, ...],
75
+ ...],
76
+ type='10 * 200 * int64')
77
+
78
+
79
+ Values can be accessed in full using the *value* property:
80
+
81
+ .. doctest::
82
+
83
+ >>> x = xnd(11 * [1])
84
+ >>> x
85
+ xnd([1, 1, 1, 1, 1, 1, 1, 1, 1, ...], type='11 * int64')
86
+ >>> x.value
87
+ [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
88
+
89
+
90
+ Types can be accessed using the *type* property:
91
+
92
+ .. doctest::
93
+
94
+ >>> x = xnd(11 * [1])
95
+ >>> x.type
96
+ ndt("11 * int64")
97
+
98
+
99
+ Ragged arrays
100
+ ~~~~~~~~~~~~~
101
+
102
+ Ragged arrays are compatible with the Arrow list representation. The data
103
+ is pointer-free, addressing the elements works by having one offset array
104
+ per dimension.
105
+
106
+ .. doctest::
107
+
108
+ >>> xnd([[0.1j], [3+2j, 4+5j, 10j]])
109
+ xnd([[0.1j], [(3+2j), (4+5j), 10j]], type='var * var * complex128')
110
+
111
+
112
+ Indexing and slicing works as usual, returning properly typed views or
113
+ values in the case of scalars:
114
+
115
+ .. doctest::
116
+
117
+ >>> x = xnd([[0.1j], [3+2j, 4+5j, 10j]])
118
+ >>> x[1, 2]
119
+ xnd(10j, type='complex128')
120
+
121
+ >>> x[1]
122
+ xnd([(3+2j), (4+5j), 10j], type='var * complex128')
123
+
124
+
125
+ Eliminating dimensions through mixed slicing and indexing is not supported
126
+ because it would require copying and adjusting potentially huge offset arrays:
127
+
128
+ .. doctest::
129
+
130
+ >>> y = x[:, 1]
131
+ Traceback (most recent call last):
132
+ File "<stdin>", line 1, in <module>
133
+ IndexError: mixed indexing and slicing is not supported for var dimensions
134
+
135
+
136
+ Records (structs)
137
+ ~~~~~~~~~~~~~~~~~
138
+
139
+ From Python 3.6 on, dicts retain their order, so they can be used directly
140
+ for initializing C structs.
141
+
142
+ .. doctest::
143
+
144
+ >>> xnd({'a': 'foo', 'b': 10.2})
145
+ xnd({'a': 'foo', 'b': 10.2}, type='{a : string, b : float64}')
146
+
147
+
148
+ Tuples
149
+ ~~~~~~
150
+
151
+ Python tuples are directly translated to the libndtypes tuple type:
152
+
153
+ .. doctest::
154
+
155
+ >>> xnd(('foo', b'bar', [None, 10.0, 20.0]))
156
+ xnd(('foo', b'bar', [None, 10.0, 20.0]), type='(string, bytes, 3 * ?float64)')
157
+
158
+
159
+ Nested arrays in structs
160
+ ~~~~~~~~~~~~~~~~~~~~~~~~
161
+
162
+ xnd seamlessly supports nested values of arbitrary depth:
163
+
164
+ .. doctest::
165
+
166
+ >>> lst = [{'name': 'John', 'internet_points': [1, 2, 3]},
167
+ ... {'name': 'Jane', 'internet_points': [4, 5, 6]}]
168
+ >>> xnd(lst)
169
+ xnd([{'name': 'John', 'internet_points': [1, 2, 3]}, {'name': 'Jane', 'internet_points': [4, 5, 6]}],
170
+ type='2 * {name : string, internet_points : 3 * int64}')
171
+
172
+
173
+ Optional data (missing values)
174
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
175
+
176
+ Optional data is currently specified using *None*. It is under debate if
177
+ a separate *NA* singleton object would be more suitable.
178
+
179
+ .. doctest::
180
+
181
+ >>> lst = [0, 1, None, 2, 3, None, 5, 10]
182
+ >>> xnd(lst)
183
+ xnd([0, 1, None, 2, 3, None, 5, 10], type='8 * ?int64')
184
+
185
+
186
+ Categorical data
187
+ ~~~~~~~~~~~~~~~~
188
+
189
+ Type inference would be ambiguous, so it cannot work directly. xnd supports
190
+ the *levels* argument that is internally translated to the type.
191
+
192
+ .. doctest::
193
+
194
+ >>> levels = ['January', 'August', 'December', None]
195
+ >>> x = xnd(['January', 'January', None, 'December', 'August', 'December', 'December'], levels=levels)
196
+ >>> x.value
197
+ ['January', 'January', None, 'December', 'August', 'December', 'December']
198
+ >>> x.type
199
+ ndt("7 * categorical('January', 'August', 'December', NA)")
200
+
201
+
202
+ The above is equivalent to specifying the type directly:
203
+
204
+ .. doctest::
205
+
206
+ >>> from ndtypes import *
207
+ >>> t = ndt("7 * categorical('January', 'August', 'December', NA)")
208
+ >>> x = xnd(['January', 'January', None, 'December', 'August', 'December', 'December'], type=t)
209
+ >>> x.value
210
+ ['January', 'January', None, 'December', 'August', 'December', 'December']
211
+ >>> x.type
212
+ ndt("7 * categorical('January', 'August', 'December', NA)")
213
+
214
+
215
+ Explicit types
216
+ --------------
217
+
218
+ While type inference is well-defined, it necessarily makes assumptions about
219
+ the programmer's intent.
220
+
221
+ There are two cases where types should be given:
222
+
223
+
224
+ Different types are intended
225
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
226
+
227
+ .. doctest::
228
+
229
+ >>> xnd([[0,1,2], [3,4,5]], type="2 * 3 * uint8")
230
+ xnd([[0, 1, 2], [3, 4, 5]], type='2 * 3 * uint8')
231
+
232
+ Here, type inference would deduce :c:macro:`int64`, so :c:macro:`uint8` needs
233
+ to be passed explicitly.
234
+
235
+
236
+ Performance
237
+ ~~~~~~~~~~~
238
+
239
+ For large arrays, explicit types are significantly faster. Type inference
240
+ supports arbitrary nesting depth, is complex and still implemented in pure
241
+ Python. Compare:
242
+
243
+ .. doctest::
244
+
245
+ >>> lst = [1] * 1000000
246
+ >>> x = xnd(lst) # inference
247
+ >>>
248
+ >>> x = xnd(lst, type='1000000 * int64') # explicit
249
+
250
+
251
+ All supported types
252
+ -------------------
253
+
254
+ Fixed arrays
255
+ ~~~~~~~~~~~~
256
+
257
+ Fixed arrays are similar to NumPy's ndarray. One difference is that internally
258
+ xnd uses steps instead of strides. One step is the amount of indices required
259
+ to move the linear index from one dimension element to the next.
260
+
261
+ This facilitates optional data, whose bitmaps need to be addressed by the
262
+ linear index. The equation *stride = step * itemsize* always holds.
263
+
264
+
265
+ .. doctest::
266
+
267
+ >>> xnd([[[1,2], [None, 3]], [[4, None], [5, 6]]])
268
+ xnd([[[1, 2], [None, 3]], [[4, None], [5, 6]]], type='2 * 2 * 2 * ?int64')
269
+
270
+ This is a fixed array with optional data.
271
+
272
+
273
+ .. doctest::
274
+
275
+ >>> xnd([(1,2.0,3j), (4,5.0,6j)])
276
+ xnd([(1, 2.0, 3j), (4, 5.0, 6j)], type='2 * (int64, float64, complex128)')
277
+
278
+ An array with tuple elements.
279
+
280
+
281
+ Fortran order
282
+ ~~~~~~~~~~~~~
283
+
284
+ Fortran order is specified by prefixing the dimensions with an exclamation mark:
285
+
286
+ .. doctest::
287
+
288
+ >>> lst = [[1, 2, 3], [4, 5, 6]]
289
+ >>> x = xnd(lst, type='!2 * 3 * uint16')
290
+ >>>
291
+ >>> x.type.shape
292
+ (2, 3)
293
+ >>> x.type.strides
294
+ (2, 4)
295
+
296
+
297
+ Alternatively, steps can be passed as arguments to the fixed dimension type:
298
+
299
+ .. doctest::
300
+
301
+ >>> from ndtypes import *
302
+ >>> lst = [[1, 2, 3], [4, 5, 6]]
303
+ >>> t = ndt("fixed(shape=2, step=1) * fixed(shape=3, step=2) * uint16")
304
+ >>> x = xnd(lst, type=t)
305
+ >>> x.type.shape
306
+ (2, 3)
307
+ >>> x.type.strides
308
+ (2, 4)
309
+
310
+
311
+ Ragged arrays
312
+ ~~~~~~~~~~~~~
313
+
314
+ Ragged arrays with explicit types are easiest to construct using the *dtype*
315
+ argument to the xnd constructor.
316
+
317
+ .. doctest::
318
+
319
+ >>> lst = [[0], [1, 2], [3, 4, 5]]
320
+ >>> xnd(lst, dtype="int32")
321
+ xnd([[0], [1, 2], [3, 4, 5]], type='var * var * int32')
322
+
323
+
324
+ Alternatively, offsets can be passed as arguments to the var dimension type:
325
+
326
+ .. doctest::
327
+
328
+ >>> from ndtypes import ndt
329
+ >>> t = ndt("var(offsets=[0,3]) * var(offsets=[0,1,3,6]) * int32")
330
+ >>> xnd(lst, type=t)
331
+ xnd([[0], [1, 2], [3, 4, 5]], type='var * var * int32')
332
+
333
+
334
+ Tuples
335
+ ~~~~~~
336
+
337
+ In memory, tuples are the same as C structs.
338
+
339
+ .. doctest::
340
+
341
+ >>> xnd(("foo", 1.0))
342
+ xnd(('foo', 1.0), type='(string, float64)')
343
+
344
+
345
+ Indexing works the same as for arrays:
346
+
347
+ .. doctest::
348
+
349
+ >>> x = xnd(("foo", 1.0))
350
+ >>> x[0]
351
+ xnd('foo', type='string')
352
+
353
+
354
+ Nested tuples are more general than ragged arrays. They can a) hold different
355
+ data types and b) the trees they represent may be unbalanced.
356
+
357
+ They do not allow slicing though and are probably less efficient.
358
+
359
+ This is an example of an unbalanced tree that cannot be represented as a
360
+ ragged array:
361
+
362
+ .. doctest::
363
+
364
+ >>> unbalanced_tree = (((1.0, 2.0), (3.0)), 4.0, ((5.0, 6.0, 7.0), ()))
365
+ >>> x = xnd(unbalanced_tree)
366
+ >>> x.value
367
+ (((1.0, 2.0), 3.0), 4.0, ((5.0, 6.0, 7.0), ()))
368
+ >>> x.type
369
+ ndt("(((float64, float64), float64), float64, ((float64, float64, float64), ()))")
370
+ >>>
371
+ >>> x[0]
372
+ xnd(((1.0, 2.0), 3.0), type='((float64, float64), float64)')
373
+ >>> x[0][0]
374
+ xnd((1.0, 2.0), type='(float64, float64)')
375
+
376
+
377
+ Note that the data in the above tree example is packed into a single contiguous
378
+ memory block.
379
+
380
+
381
+ Records
382
+ ~~~~~~~
383
+
384
+ In memory, records are C structs. The field names are only stored in the type.
385
+
386
+ The following examples use Python-3.6, which keeps the dict initialization
387
+ order.
388
+
389
+ .. doctest::
390
+
391
+ >>> x = xnd({'a': b'123', 'b': {'x': 1.2, 'y': 100+3j}})
392
+ >>> x.value
393
+ {'a': b'123', 'b': {'x': 1.2, 'y': (100+3j)}}
394
+ >>> x.type
395
+ ndt("{a : bytes, b : {x : float64, y : complex128}}")
396
+
397
+
398
+ Indexing works the same as for arrays. Additionally, fields can be indexed
399
+ by name:
400
+
401
+ .. doctest::
402
+
403
+ >>> x[0]
404
+ xnd(b'123', type='bytes')
405
+ >>> x['a']
406
+ xnd(b'123', type='bytes')
407
+ >>> x['b']
408
+ xnd({'x': 1.2, 'y': (100+3j)}, type='{x : float64, y : complex128}')
409
+
410
+
411
+ The nesting depth is arbitrary. In the following example, the data -- except
412
+ for strings, which are pointers -- is packed into a single contiguous memory
413
+ block:
414
+
415
+ .. code-block:: py
416
+
417
+ >>> from pprint import pprint
418
+ >>> item = {
419
+ ... "id": 1001,
420
+ ... "name": "cyclotron",
421
+ ... "price": 5998321.99,
422
+ ... "tags": ["connoisseur", "luxury"],
423
+ ... "stock": {
424
+ ... "warehouse": 722,
425
+ ... "retail": 20
426
+ ... }
427
+ ... }
428
+ >>> x = xnd(item)
429
+ >>>
430
+ >>> pprint(x.value)
431
+ {'id': 1001,
432
+ 'name': 'cyclotron',
433
+ 'price': 5998321.99,
434
+ 'stock': {'retail': 20, 'warehouse': 722},
435
+ 'tags': ['connoisseur', 'luxury']}
436
+ >>>
437
+ >>> x.type.pprint()
438
+ {
439
+ id : int64,
440
+ name : string,
441
+ price : float64,
442
+ tags : 2 * string,
443
+ stock : {
444
+ warehouse : int64,
445
+ retail : int64
446
+ }
447
+ }
448
+
449
+
450
+ Strings can be embedded into the array by specifying the fixed string type.
451
+ In this case, the memory block is pointer-free.
452
+
453
+ .. code-block:: py
454
+
455
+ >>> from ndtypes import ndt
456
+ >>>
457
+ >>> t = """
458
+ ... { id : int64,
459
+ ... name : fixed_string(30),
460
+ ... price : float64,
461
+ ... tags : 2 * fixed_string(30),
462
+ ... stock : {warehouse : int64, retail : int64}
463
+ ... }
464
+ ... """
465
+ >>>
466
+ >>> x = xnd(item, type=t)
467
+ >>> x.type.pprint()
468
+ {
469
+ id : int64,
470
+ name : fixed_string(30),
471
+ price : float64,
472
+ tags : 2 * fixed_string(30),
473
+ stock : {
474
+ warehouse : int64,
475
+ retail : int64
476
+ }
477
+ }
478
+
479
+
480
+ Record of arrays
481
+ ~~~~~~~~~~~~~~~~
482
+
483
+ Often it is more memory efficient to store an array of records as a record of
484
+ arrays. This example with columnar data is from the Arrow homepage:
485
+
486
+ .. doctest::
487
+
488
+ >>> data = {'session_id': [1331247700, 1331247702, 1331247709, 1331247799],
489
+ ... 'timestamp': [1515529735.4895875, 1515529746.2128427, 1515529756.4485607, 1515529766.2181058],
490
+ ... 'source_ip': ['8.8.8.100', '100.2.0.11', '99.101.22.222', '12.100.111.200']}
491
+ >>> x = xnd(data)
492
+ >>> x.type
493
+ ndt("{session_id : 4 * int64, timestamp : 4 * float64, source_ip : 4 * string}")
494
+
495
+
496
+
497
+ References
498
+ ~~~~~~~~~~
499
+
500
+ References are transparent pointers to new memory blocks (meaning a new
501
+ data pointer, not a whole new xnd buffer).
502
+
503
+ For example, this is an array of pointer to array:
504
+
505
+ .. doctest::
506
+
507
+ >>> t = ndt("3 * ref(4 * uint64)")
508
+ >>> lst = [[0,1,2,3], [4,5,6,7], [8,9,10,11]]
509
+ >>> xnd(lst, type=t)
510
+ xnd([[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]], type='3 * ref(4 * uint64)')
511
+
512
+ The user sees no difference to a regular 3 by 4 array, but internally
513
+ the outer dimension consists of three pointers to the inner arrays.
514
+
515
+ For memory blocks generated by xnd itself the feature is not so useful --
516
+ after all, it is usually better to have a single memory block than one
517
+ with additional pointers.
518
+
519
+
520
+ However, suppose that in the above columnar data example another application
521
+ represents the arrays inside the record with pointers. Using the *ref* type,
522
+ data structures borrowed from such an application can be properly typed:
523
+
524
+ .. doctest::
525
+
526
+ >>> t = ndt("{session_id : &4 * int64, timestamp : &4 * float64, source_ip : &4 * string}")
527
+ >>> x = xnd(data, type=t)
528
+ >>> x.type
529
+ ndt("{session_id : ref(4 * int64), timestamp : ref(4 * float64), source_ip : ref(4 * string)}")
530
+
531
+ The ampersand is the shorthand for "ref".
532
+
533
+
534
+
535
+ Constructors
536
+ ~~~~~~~~~~~~
537
+
538
+ Constructors are xnd's way of creating distinct named types. The constructor
539
+ argument is a regular type.
540
+
541
+ Constructors open up a new dtype, so named arrays can be the dtype of
542
+ other arrays. Type inference currently isn't aware of constructors,
543
+ so types have to be provided.
544
+
545
+ .. doctest::
546
+
547
+ >>> t = ndt("3 * SomeMatrix(2 * 2 * float32)")
548
+ >>> lst = [[[1,2], [3,4]], [[5,6], [7,8]], [[9,10], [11,12]]]
549
+ >>> x = xnd(lst, type=t)
550
+ >>> x
551
+ xnd([[[1.0, 2.0], [3.0, 4.0]], [[5.0, 6.0], [7.0, 8.0]], [[9.0, 10.0], [11.0, 12.0]]],
552
+ type='3 * SomeMatrix(2 * 2 * float32)')
553
+ >>> x[0]
554
+ xnd([[1.0, 2.0], [3.0, 4.0]], type='SomeMatrix(2 * 2 * float32)')
555
+
556
+
557
+ Categorical
558
+ ~~~~~~~~~~~
559
+
560
+ Categorical types contain values. The data stored in xnd buffers are indices
561
+ (:c:macro:`int64`) into the type's categories.
562
+
563
+ .. doctest::
564
+
565
+ >>> t = ndt("categorical('a', 'b', 'c', NA)")
566
+ >>> data = ['a', 'a', 'b', 'a', 'a', 'a', 'foo', 'c']
567
+ >>> x = xnd(data, dtype=t)
568
+ >>> x.value
569
+ ['a', 'a', 'b', 'a', 'a', 'a', None, 'c']
570
+
571
+
572
+ Fixed String
573
+ ~~~~~~~~~~~~
574
+
575
+ Fixed strings are embedded into arrays. Supported encodings are 'ascii',
576
+ 'utf8', 'utf16' and 'utf32'. The string size argument denotes the number
577
+ of code points rather than bytes.
578
+
579
+ .. doctest::
580
+
581
+ >>> t = ndt("10 * fixed_string(3, 'utf32')")
582
+ >>> x = xnd.empty(t)
583
+ >>> x.value
584
+ ['', '', '', '', '', '', '', '', '', '']
585
+ >>> x[3] = "\U000003B1\U000003B2\U000003B3"
586
+ >>> x.value
587
+ ['', '', '', 'αβγ', '', '', '', '', '', '']
588
+
589
+
590
+ Fixed Bytes
591
+ ~~~~~~~~~~~
592
+
593
+ Fixed bytes are embedded into arrays.
594
+
595
+ .. doctest::
596
+
597
+ >>> t = ndt("3 * fixed_bytes(size=3)")
598
+ >>> x = xnd.empty(t)
599
+ >>> x[2] = b'123'
600
+ >>> x.value
601
+ [b'\x00\x00\x00', b'\x00\x00\x00', b'123']
602
+ >>> x.align
603
+ 1
604
+
605
+ Alignment can be requested with the requirement that size is a multiple of
606
+ alignment:
607
+
608
+ .. doctest::
609
+
610
+ >>> t = ndt("3 * fixed_bytes(size=32, align=16)")
611
+ >>> x = xnd.empty(t)
612
+ >>> x.align
613
+ 16
614
+
615
+
616
+ String
617
+ ~~~~~~
618
+
619
+ Strings are pointers to :c:macro:`NUL`-terminated UTF-8 strings.
620
+
621
+ .. doctest::
622
+
623
+ >>> x = xnd.empty("10 * string")
624
+ >>> x.value
625
+ ['', '', '', '', '', '', '', '', '', '']
626
+ >>> x[0] = "abc"
627
+ >>> x.value
628
+ ['abc', '', '', '', '', '', '', '', '', '']
629
+
630
+
631
+
632
+ Bytes
633
+ ~~~~~
634
+
635
+ Internally, bytes are structs with a size field and a pointer to the data.
636
+
637
+ .. doctest::
638
+
639
+ >>> xnd([b'123', b'45678'])
640
+ xnd([b'123', b'45678'], type='2 * bytes')
641
+
642
+
643
+ The bytes constructor takes an optional *align* argument that specifies the
644
+ alignment of the allocated data:
645
+
646
+ .. doctest::
647
+
648
+ >>> x = xnd([b'abc', b'123'], type="2 * bytes(align=64)")
649
+ >>> x.value
650
+ [b'abc', b'123']
651
+ >>> x.align
652
+ 8
653
+
654
+ Note that *x.align* is the alignment of the array. The embedded pointers
655
+ to the bytes data are aligned at *64*.
656
+
657
+
658
+ Primitive types
659
+ ~~~~~~~~~~~~~~~
660
+
661
+ As a short example, here is a tuple that contains all primitive types:
662
+
663
+ .. doctest::
664
+
665
+ >>> s = """
666
+ ... (bool,
667
+ ... int8, int16, int32, int64,
668
+ ... uint8, uint16, uint32, uint64,
669
+ ... float16, float32, float64,
670
+ ... complex32, complex64, complex128)
671
+ ... """
672
+ >>> x = xnd.empty(s)
673
+ >>> x.value
674
+ (False, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0, 0.0, 0j, 0j, 0j)