xnd 0.2.0dev3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (93) hide show
  1. checksums.yaml +7 -0
  2. data/CONTRIBUTING.md +42 -0
  3. data/Gemfile +3 -0
  4. data/History.md +0 -0
  5. data/README.md +7 -0
  6. data/Rakefile +135 -0
  7. data/ext/ruby_xnd/extconf.rb +70 -0
  8. data/ext/ruby_xnd/float_pack_unpack.c +277 -0
  9. data/ext/ruby_xnd/float_pack_unpack.h +39 -0
  10. data/ext/ruby_xnd/gc_guard.c +36 -0
  11. data/ext/ruby_xnd/gc_guard.h +12 -0
  12. data/ext/ruby_xnd/include/xnd.h +449 -0
  13. data/ext/ruby_xnd/lib/libxnd.a +0 -0
  14. data/ext/ruby_xnd/lib/libxnd.so +1 -0
  15. data/ext/ruby_xnd/lib/libxnd.so.0 +1 -0
  16. data/ext/ruby_xnd/lib/libxnd.so.0.2.0dev3 +0 -0
  17. data/ext/ruby_xnd/memory_block_object.c +32 -0
  18. data/ext/ruby_xnd/memory_block_object.h +33 -0
  19. data/ext/ruby_xnd/ruby_xnd.c +1953 -0
  20. data/ext/ruby_xnd/ruby_xnd.h +61 -0
  21. data/ext/ruby_xnd/ruby_xnd_internal.h +85 -0
  22. data/ext/ruby_xnd/util.h +170 -0
  23. data/ext/ruby_xnd/xnd/AUTHORS.txt +5 -0
  24. data/ext/ruby_xnd/xnd/INSTALL.txt +134 -0
  25. data/ext/ruby_xnd/xnd/LICENSE.txt +29 -0
  26. data/ext/ruby_xnd/xnd/MANIFEST.in +3 -0
  27. data/ext/ruby_xnd/xnd/Makefile.in +80 -0
  28. data/ext/ruby_xnd/xnd/README.rst +44 -0
  29. data/ext/ruby_xnd/xnd/config.guess +1530 -0
  30. data/ext/ruby_xnd/xnd/config.h.in +22 -0
  31. data/ext/ruby_xnd/xnd/config.sub +1782 -0
  32. data/ext/ruby_xnd/xnd/configure +4867 -0
  33. data/ext/ruby_xnd/xnd/configure.ac +164 -0
  34. data/ext/ruby_xnd/xnd/doc/Makefile +14 -0
  35. data/ext/ruby_xnd/xnd/doc/_static/copybutton.js +66 -0
  36. data/ext/ruby_xnd/xnd/doc/conf.py +26 -0
  37. data/ext/ruby_xnd/xnd/doc/index.rst +44 -0
  38. data/ext/ruby_xnd/xnd/doc/libxnd/data-structures.rst +186 -0
  39. data/ext/ruby_xnd/xnd/doc/libxnd/functions.rst +148 -0
  40. data/ext/ruby_xnd/xnd/doc/libxnd/index.rst +25 -0
  41. data/ext/ruby_xnd/xnd/doc/releases/index.rst +34 -0
  42. data/ext/ruby_xnd/xnd/doc/xnd/align-pack.rst +96 -0
  43. data/ext/ruby_xnd/xnd/doc/xnd/buffer-protocol.rst +42 -0
  44. data/ext/ruby_xnd/xnd/doc/xnd/index.rst +30 -0
  45. data/ext/ruby_xnd/xnd/doc/xnd/quickstart.rst +62 -0
  46. data/ext/ruby_xnd/xnd/doc/xnd/types.rst +674 -0
  47. data/ext/ruby_xnd/xnd/install-sh +527 -0
  48. data/ext/ruby_xnd/xnd/libxnd/Makefile.in +102 -0
  49. data/ext/ruby_xnd/xnd/libxnd/Makefile.vc +112 -0
  50. data/ext/ruby_xnd/xnd/libxnd/bitmaps.c +345 -0
  51. data/ext/ruby_xnd/xnd/libxnd/contrib.h +313 -0
  52. data/ext/ruby_xnd/xnd/libxnd/copy.c +944 -0
  53. data/ext/ruby_xnd/xnd/libxnd/equal.c +1216 -0
  54. data/ext/ruby_xnd/xnd/libxnd/inline.h +154 -0
  55. data/ext/ruby_xnd/xnd/libxnd/overflow.h +147 -0
  56. data/ext/ruby_xnd/xnd/libxnd/split.c +286 -0
  57. data/ext/ruby_xnd/xnd/libxnd/tests/Makefile.in +39 -0
  58. data/ext/ruby_xnd/xnd/libxnd/tests/Makefile.vc +44 -0
  59. data/ext/ruby_xnd/xnd/libxnd/tests/README.txt +2 -0
  60. data/ext/ruby_xnd/xnd/libxnd/tests/runtest.c +101 -0
  61. data/ext/ruby_xnd/xnd/libxnd/tests/test.h +48 -0
  62. data/ext/ruby_xnd/xnd/libxnd/tests/test_fixed.c +108 -0
  63. data/ext/ruby_xnd/xnd/libxnd/xnd.c +1304 -0
  64. data/ext/ruby_xnd/xnd/libxnd/xnd.h +449 -0
  65. data/ext/ruby_xnd/xnd/python/test_xnd.py +3144 -0
  66. data/ext/ruby_xnd/xnd/python/xnd/__init__.py +290 -0
  67. data/ext/ruby_xnd/xnd/python/xnd/_xnd.c +2822 -0
  68. data/ext/ruby_xnd/xnd/python/xnd/contrib/pretty.py +850 -0
  69. data/ext/ruby_xnd/xnd/python/xnd/docstrings.h +129 -0
  70. data/ext/ruby_xnd/xnd/python/xnd/pyxnd.h +200 -0
  71. data/ext/ruby_xnd/xnd/python/xnd/util.h +182 -0
  72. data/ext/ruby_xnd/xnd/python/xnd_randvalue.py +1121 -0
  73. data/ext/ruby_xnd/xnd/python/xnd_support.py +106 -0
  74. data/ext/ruby_xnd/xnd/setup.py +303 -0
  75. data/ext/ruby_xnd/xnd/vcbuild/INSTALL.txt +42 -0
  76. data/ext/ruby_xnd/xnd/vcbuild/runtest32.bat +16 -0
  77. data/ext/ruby_xnd/xnd/vcbuild/runtest64.bat +14 -0
  78. data/ext/ruby_xnd/xnd/vcbuild/vcbuild32.bat +29 -0
  79. data/ext/ruby_xnd/xnd/vcbuild/vcbuild64.bat +29 -0
  80. data/ext/ruby_xnd/xnd/vcbuild/vcclean.bat +13 -0
  81. data/ext/ruby_xnd/xnd/vcbuild/vcdistclean.bat +14 -0
  82. data/lib/ruby_xnd.so +0 -0
  83. data/lib/xnd.rb +306 -0
  84. data/lib/xnd/monkeys.rb +29 -0
  85. data/lib/xnd/version.rb +6 -0
  86. data/spec/debug_spec.rb +9 -0
  87. data/spec/gc_guard_spec.rb +10 -0
  88. data/spec/leakcheck.rb +9 -0
  89. data/spec/spec_helper.rb +877 -0
  90. data/spec/type_inference_spec.rb +81 -0
  91. data/spec/xnd_spec.rb +2921 -0
  92. data/xnd.gemspec +47 -0
  93. metadata +215 -0
@@ -0,0 +1,148 @@
1
+ .. meta::
2
+ :robots: index,follow
3
+ :description: libndtypes documentation
4
+
5
+ .. sectionauthor:: Stefan Krah <skrah at bytereef.org>
6
+
7
+
8
+ Functions
9
+ =========
10
+
11
+ Create typed memory blocks
12
+ --------------------------
13
+
14
+ The main use case for libxnd is to create and manage typed memory blocks.
15
+ These blocks are fully initialized to *0*. References to additional memory
16
+ blocks are allocated and initialized recursively.
17
+
18
+ *bytes* and *string* types are initialized to :c:macro:`NULL`, since their
19
+ actual length is not known yet.
20
+
21
+
22
+ .. topic:: xnd_empty_from_string
23
+
24
+ .. code-block:: c
25
+
26
+ xnd_master_t *xnd_empty_from_string(const char *s, uint32_t flags, ndt_context_t *ctx);
27
+
28
+ Return a new master buffer according to the type string in *s*. *flags*
29
+ must include :c:macro:`XND_OWN_TYPE`.
30
+
31
+
32
+ .. topic:: xnd_empty_from_type
33
+
34
+ .. code-block:: c
35
+
36
+ xnd_master_t *xnd_empty_from_type(const ndt_t *t, uint32_t flags, ndt_context_t *ctx);
37
+
38
+
39
+ Return a new master buffer according to *type*. *flags* must not include
40
+ :c:macro:`XND_OWN_TYPE`, i.e. the type is externally managed.
41
+
42
+ This is the case in the Python bindings, where the ndtypes module creates
43
+ and manages types.
44
+
45
+
46
+ Delete typed memory blocks
47
+ --------------------------
48
+
49
+ .. topic:: xnd_del
50
+
51
+ .. code-block:: c
52
+
53
+ void xnd_del(xnd_master_t *x);
54
+
55
+ Delete the master buffer according to its flags. *x* may be :c:macro:`NULL`.
56
+ *x->master.ptr* and *x->master.type* may be :c:macro:`NULL`.
57
+
58
+ The latter situation should only arise when breaking up reference cycles.
59
+ This is used in the Python module.
60
+
61
+
62
+ Bitmaps
63
+ -------
64
+
65
+ .. topic:: xnd_bitmap_next
66
+
67
+ .. code-block:: c
68
+
69
+ xnd_bitmap_t xnd_bitmap_next(const xnd_t *x, int64_t i, ndt_context_t *ctx);
70
+
71
+ Get the next bitmap for the *Tuple*, *Record*, *Ref* and *Constr* types.
72
+
73
+ This is a convenience function that checks if the types have optional
74
+ subtrees.
75
+
76
+ If yes, return the bitmap at index *i*. If not, it return an empty bitmap
77
+ that must not be accessed.
78
+
79
+
80
+ .. topic:: xnd_set_valid
81
+
82
+ .. code-block:: c
83
+
84
+ void xnd_set_valid(xnd_t *x);
85
+
86
+ Set the validity bit at *x->index*. *x* must have an optional type.
87
+
88
+
89
+ .. topic:: xnd_set_na
90
+
91
+ .. code-block:: c
92
+
93
+ void xnd_set_na(xnd_t *x);
94
+
95
+ Clear the validity bit at *x->index*. *x* must have an optional type.
96
+
97
+
98
+ .. topic:: xnd_is_valid
99
+
100
+ .. code-block:: c
101
+
102
+ int xnd_is_valid(const xnd_t *x);
103
+
104
+ Check if the element at *x->index* is valid. If *x* does not have an optional
105
+ type, return *1*. Otherwise, return the validity bit (zero or nonzero).
106
+
107
+
108
+ .. topic:: xnd_is_na
109
+
110
+ .. code-block:: c
111
+
112
+ int xnd_is_na(const xnd_t *x);
113
+
114
+ Check if the element at *x->index* is valid. If *x* does not have an optional
115
+ type, return *0*. Otherwise, return the negation of the validity bit.
116
+
117
+
118
+ .. topic:: xnd_subtree
119
+
120
+ .. code-block:: c
121
+
122
+ xnd_t xnd_subtree(const xnd_t *x, const xnd_index_t indices[], int len,
123
+ ndt_context_t *ctx);
124
+
125
+ Apply zero or more indices to the input *x* and return a typed view. Valid
126
+ indices are integers or strings for record fields.
127
+
128
+ This function is more general than pure array indexing, hence the name. For
129
+ example, it is possible to index into nested records that in turn contain
130
+ arrays.
131
+
132
+
133
+ .. topic:: xnd_multikey
134
+
135
+ .. code-block:: c
136
+
137
+ xnd_t xnd_multikey(const xnd_t *x, const xnd_index_t indices[], int len,
138
+ ndt_context_t *ctx);
139
+
140
+ Apply zero or more keys to the input *x* and return a typed view. Valid
141
+ keys are integers or slices.
142
+
143
+ This function differs from :c:func:`xnd_subtree` in that it allows
144
+ mixed indexing and slicing for fixed dimensions. Records and tuples
145
+ cannot be sliced.
146
+
147
+ Variable dimensions can be sliced, but do not support mixed indexing
148
+ and slicing.
@@ -0,0 +1,25 @@
1
+ .. meta::
2
+ :robots: index, follow
3
+ :description: libxnd documentation
4
+ :keywords: libxnd, C, array computing
5
+
6
+ .. sectionauthor:: Stefan Krah <skrah at bytereef.org>
7
+
8
+
9
+ libxnd
10
+ ------
11
+
12
+ libxnd implements support for typed memory blocks using the libndtypes
13
+ type library.
14
+
15
+ Types include ndarrays, ragged arrays (compatible with the Arrow list type),
16
+ optional data (bitmaps are compatible with Arrow), tuples, records (structs),
17
+ strings, bytes and categorical values.
18
+
19
+
20
+ .. toctree::
21
+
22
+ data-structures.rst
23
+ functions.rst
24
+
25
+
@@ -0,0 +1,34 @@
1
+ .. meta::
2
+ :robots: index, follow
3
+ :description: libndtypes documentation
4
+ :keywords: libndtypes, C, array computing
5
+
6
+ .. sectionauthor:: Stefan Krah <skrah at bytereef.org>
7
+
8
+
9
+ ========
10
+ Releases
11
+ ========
12
+
13
+
14
+ v0.2.0b2 (February 5th 2018)
15
+ ============================
16
+
17
+ Second release (beta2). This release addresses several build and packaging issues:
18
+
19
+ - Avoid copying libraries into the Python package if system libraries are used.
20
+
21
+ - The build and install partially relied on the dev setup (ndtypes checked out
22
+ in the xnd directory). This dependency has been removed.
23
+
24
+ - The conda build now supports separate library and Python module installs.
25
+
26
+ - Configure now has a **–without-docs** option for skipping the doc install.
27
+
28
+
29
+ v0.2.0b1 (January 20th 2018)
30
+ ============================
31
+
32
+ First release (beta1).
33
+
34
+
@@ -0,0 +1,96 @@
1
+ .. meta::
2
+ :robots: index,follow
3
+ :description: xnd container
4
+ :keywords: xnd, alignment, packing
5
+
6
+ .. sectionauthor:: Stefan Krah <skrah at bytereef.org>
7
+
8
+
9
+ Alignment and packing
10
+ =====================
11
+
12
+ The xnd memory allocators support explicit alignment. Alignment is specified
13
+ in the types.
14
+
15
+
16
+ Tuples and records
17
+ ------------------
18
+
19
+ Tuples and records have the *align* and *pack* keywords that have the same
20
+ purpose as gcc's *aligned* and *packed* struct attributes.
21
+
22
+
23
+ Field alignment
24
+ ~~~~~~~~~~~~~~~
25
+
26
+ The *align* keyword can be used to specify an alignment that is greater
27
+ than the natural alignment of a field:
28
+
29
+ .. doctest::
30
+
31
+ >>> from xnd import *
32
+ >>> s = "(uint8, uint64 |align=32|, uint64)"
33
+ >>> x = xnd.empty(s)
34
+ >>> x.align
35
+ 32
36
+ >>> x.type.datasize
37
+ 64
38
+
39
+
40
+
41
+ Field packing
42
+ ~~~~~~~~~~~~~
43
+
44
+ The *pack* keyword can be used to specify an alignment that is smaller
45
+ than the natural alignment of a field:
46
+
47
+ .. doctest::
48
+
49
+ >>> s = "(uint8, uint64 |pack=2|, uint64)"
50
+ >>> x = xnd.empty(s)
51
+ >>> x.align
52
+ 8
53
+ >>> x.type.datasize
54
+ 24
55
+
56
+
57
+
58
+ Struct packing
59
+ ~~~~~~~~~~~~~~
60
+
61
+ The *pack* and *align* keywords can be applied to the entire struct:
62
+
63
+ .. doctest::
64
+
65
+ >>> s = "(uint8, uint64, uint64, pack=1)"
66
+ >>> x = xnd.empty(s)
67
+ >>> x.align
68
+ 1
69
+ >>> x.type.datasize
70
+ 17
71
+
72
+
73
+ Individual field and struct directives are mutually exclusive:
74
+
75
+ .. doctest::
76
+
77
+ >>> s = "2 * (uint8 |align=16|, uint64, pack=1)"
78
+ >>> x = xnd.empty(s)
79
+ Traceback (most recent call last):
80
+ File "<stdin>", line 1, in <module>
81
+ ValueError: cannot have 'pack' tuple attribute and field attributes
82
+
83
+
84
+ Array alignment
85
+ ~~~~~~~~~~~~~~~
86
+
87
+ An array has the same alignment as its elements:
88
+
89
+ .. doctest::
90
+
91
+ >>> s = "2 * (uint8, uint64, pack=1)"
92
+ >>> x = xnd.empty(s)
93
+ >>> x.align
94
+ 1
95
+ >>> x.type.datasize
96
+ 18
@@ -0,0 +1,42 @@
1
+ .. meta::
2
+ :robots: index,follow
3
+ :description: xnd container
4
+ :keywords: xnd, buffer protocol
5
+
6
+ .. sectionauthor:: Stefan Krah <skrah at bytereef.org>
7
+
8
+
9
+ Buffer protocol
10
+ ===============
11
+
12
+ xnd supports importing PEP-3118 buffers.
13
+
14
+
15
+ From NumPy
16
+ ----------
17
+
18
+ Import a simple ndarray:
19
+
20
+ .. doctest::
21
+
22
+ >>> import numpy as np
23
+ >>> from xnd import *
24
+ >>> x = np.array([[[0,1,2], [3,4,5]], [[6,7,8], [9,10,11]]])
25
+ >>> y = xnd.from_buffer(x)
26
+ >>> y.type
27
+ ndt("2 * 2 * 3 * int64")
28
+ >>> y.value
29
+ [[[0, 1, 2], [3, 4, 5]], [[6, 7, 8], [9, 10, 11]]]
30
+
31
+
32
+ Import an ndarray with a struct dtype:
33
+
34
+ .. doctest::
35
+
36
+ >>> x = np.array([(1000, 400.25, 'abc'), (-23, -1e10, 'cba')],
37
+ ... dtype=[('x', '<i4'), ('y', '>f4'), ('z', 'S3')])
38
+ >>> y = xnd.from_buffer(x)
39
+ >>> y.type
40
+ ndt("2 * {x : int32, y : >float32, z : fixed_bytes(size=3)}")
41
+ >>> y.value
42
+ [{'x': 1000, 'y': 400.25, 'z': b'abc'}, {'x': -23, 'y': -10000000000.0, 'z': b'cba'}]
@@ -0,0 +1,30 @@
1
+ .. meta::
2
+ :robots: index, follow
3
+ :description: xnd documentation
4
+ :keywords: memory blocks, unboxed values, array computing, Python
5
+
6
+ .. sectionauthor:: Stefan Krah <skrah at bytereef.org>
7
+
8
+
9
+ xnd
10
+ ---
11
+
12
+ The xnd module implements a container type that maps most Python values
13
+ relevant for scientific computing directly to typed memory.
14
+
15
+ Whenever possible, a single, pointer-free memory block is used.
16
+
17
+ xnd supports ragged arrays, categorical types, indexing, slicing, aligned memory blocks and type inference.
18
+
19
+ Operations like indexing and slicing return zero-copy typed views on the data.
20
+
21
+ Importing PEP-3118 buffers is supported.
22
+
23
+
24
+ .. toctree::
25
+ :maxdepth: 1
26
+
27
+ types.rst
28
+ align-pack.rst
29
+ buffer-protocol.rst
30
+ quickstart.rst
@@ -0,0 +1,62 @@
1
+ .. meta::
2
+ :robots: index,follow
3
+ :description: xnd quickstart
4
+ :keywords: xnd, install
5
+
6
+ .. sectionauthor:: Stefan Krah <skrah at bytereef.org>
7
+
8
+
9
+ Quick Start
10
+ ===========
11
+
12
+ Prerequisites
13
+ ~~~~~~~~~~~~~
14
+
15
+ Python2 is not supported. If not already present, install the Python3
16
+ development packages:
17
+
18
+ .. code-block:: sh
19
+
20
+ # Debian, Ubuntu:
21
+ sudo apt-get install gcc make
22
+ sudo apt-get install python3-dev
23
+
24
+ # Fedora, RedHat:
25
+ sudo yum install gcc make
26
+ sudo yum install python3-devel
27
+
28
+ # openSUSE:
29
+ sudo zypper install gcc make
30
+ sudo zypper install python3-devel
31
+
32
+ # BSD:
33
+ # You know what to do.
34
+
35
+ # Mac OS X:
36
+ # Install Xcode and Python 3 headers.
37
+
38
+
39
+ Install
40
+ ~~~~~~~
41
+
42
+ If `pip <http://pypi.python.org/pypi/pip>`_ is present on the system, installation
43
+ should be as easy as:
44
+
45
+ .. code-block:: sh
46
+
47
+ pip install xnd
48
+
49
+
50
+ Otherwise:
51
+
52
+ .. code-block:: sh
53
+
54
+ tar xvzf xnd.2.0b1.tar.gz
55
+ cd xnd.2.0b1
56
+ python3 setup.py install
57
+
58
+
59
+ Windows
60
+ ~~~~~~~
61
+
62
+ Refer to the instructions in the *vcbuild* directory in the source distribution.
@@ -0,0 +1,674 @@
1
+ .. meta::
2
+ :robots: index,follow
3
+ :description: xnd container
4
+ :keywords: xnd, types, examples
5
+
6
+ .. sectionauthor:: Stefan Krah <skrah at bytereef.org>
7
+
8
+
9
+ Types
10
+ =====
11
+
12
+ The xnd object is a container that maps a wide range of Python values directly
13
+ to memory. xnd unpacks complex types of arbitrary nesting depth to a single
14
+ memory block.
15
+
16
+ Pointers only occur in explicit pointer types like *Ref* (reference), *Bytes*
17
+ and *String*, but not in the general case.
18
+
19
+
20
+ Type inference
21
+ --------------
22
+
23
+ If no explicit type is given, xnd supports type inference by assuming
24
+ types for the most common Python values.
25
+
26
+
27
+ Fixed arrays
28
+ ~~~~~~~~~~~~
29
+
30
+ .. doctest::
31
+
32
+ >>> from xnd import *
33
+ >>> x = xnd([[0, 1, 2], [3, 4, 5]])
34
+ >>> x
35
+ xnd([[0, 1, 2], [3, 4, 5]], type='2 * 3 * int64')
36
+
37
+
38
+ As expected, lists are mapped to ndarrays and integers to int64. Indexing and
39
+ slicing works the usual way. For performance reasons these operations return
40
+ zero-copy views whenever possible:
41
+
42
+ .. doctest::
43
+
44
+ >>> x[0][1] # Indexing returns views, even for scalars.
45
+ xnd(1, type='int64')
46
+ >>>
47
+ >>> y = x[:, ::-1] # Containers are returned as views.
48
+ >>> y
49
+ xnd([[2, 1, 0], [5, 4, 3]], type='2 * 3 * int64')
50
+
51
+
52
+ Subarrays are views and properly typed:
53
+
54
+ .. doctest::
55
+
56
+ >>> x[1]
57
+ xnd([3, 4, 5], type='3 * int64')
58
+
59
+
60
+ The representation of large values is abbreviated:
61
+
62
+ .. doctest::
63
+
64
+ >>> x = xnd(10 * [200 * [1]])
65
+ >>> x
66
+ xnd([[1, 1, 1, 1, 1, 1, 1, 1, 1, ...],
67
+ [1, 1, 1, 1, 1, 1, 1, 1, 1, ...],
68
+ [1, 1, 1, 1, 1, 1, 1, 1, 1, ...],
69
+ [1, 1, 1, 1, 1, 1, 1, 1, 1, ...],
70
+ [1, 1, 1, 1, 1, 1, 1, 1, 1, ...],
71
+ [1, 1, 1, 1, 1, 1, 1, 1, 1, ...],
72
+ [1, 1, 1, 1, 1, 1, 1, 1, 1, ...],
73
+ [1, 1, 1, 1, 1, 1, 1, 1, 1, ...],
74
+ [1, 1, 1, 1, 1, 1, 1, 1, 1, ...],
75
+ ...],
76
+ type='10 * 200 * int64')
77
+
78
+
79
+ Values can be accessed in full using the *value* property:
80
+
81
+ .. doctest::
82
+
83
+ >>> x = xnd(11 * [1])
84
+ >>> x
85
+ xnd([1, 1, 1, 1, 1, 1, 1, 1, 1, ...], type='11 * int64')
86
+ >>> x.value
87
+ [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
88
+
89
+
90
+ Types can be accessed using the *type* property:
91
+
92
+ .. doctest::
93
+
94
+ >>> x = xnd(11 * [1])
95
+ >>> x.type
96
+ ndt("11 * int64")
97
+
98
+
99
+ Ragged arrays
100
+ ~~~~~~~~~~~~~
101
+
102
+ Ragged arrays are compatible with the Arrow list representation. The data
103
+ is pointer-free, addressing the elements works by having one offset array
104
+ per dimension.
105
+
106
+ .. doctest::
107
+
108
+ >>> xnd([[0.1j], [3+2j, 4+5j, 10j]])
109
+ xnd([[0.1j], [(3+2j), (4+5j), 10j]], type='var * var * complex128')
110
+
111
+
112
+ Indexing and slicing works as usual, returning properly typed views or
113
+ values in the case of scalars:
114
+
115
+ .. doctest::
116
+
117
+ >>> x = xnd([[0.1j], [3+2j, 4+5j, 10j]])
118
+ >>> x[1, 2]
119
+ xnd(10j, type='complex128')
120
+
121
+ >>> x[1]
122
+ xnd([(3+2j), (4+5j), 10j], type='var * complex128')
123
+
124
+
125
+ Eliminating dimensions through mixed slicing and indexing is not supported
126
+ because it would require copying and adjusting potentially huge offset arrays:
127
+
128
+ .. doctest::
129
+
130
+ >>> y = x[:, 1]
131
+ Traceback (most recent call last):
132
+ File "<stdin>", line 1, in <module>
133
+ IndexError: mixed indexing and slicing is not supported for var dimensions
134
+
135
+
136
+ Records (structs)
137
+ ~~~~~~~~~~~~~~~~~
138
+
139
+ From Python 3.6 on, dicts retain their order, so they can be used directly
140
+ for initializing C structs.
141
+
142
+ .. doctest::
143
+
144
+ >>> xnd({'a': 'foo', 'b': 10.2})
145
+ xnd({'a': 'foo', 'b': 10.2}, type='{a : string, b : float64}')
146
+
147
+
148
+ Tuples
149
+ ~~~~~~
150
+
151
+ Python tuples are directly translated to the libndtypes tuple type:
152
+
153
+ .. doctest::
154
+
155
+ >>> xnd(('foo', b'bar', [None, 10.0, 20.0]))
156
+ xnd(('foo', b'bar', [None, 10.0, 20.0]), type='(string, bytes, 3 * ?float64)')
157
+
158
+
159
+ Nested arrays in structs
160
+ ~~~~~~~~~~~~~~~~~~~~~~~~
161
+
162
+ xnd seamlessly supports nested values of arbitrary depth:
163
+
164
+ .. doctest::
165
+
166
+ >>> lst = [{'name': 'John', 'internet_points': [1, 2, 3]},
167
+ ... {'name': 'Jane', 'internet_points': [4, 5, 6]}]
168
+ >>> xnd(lst)
169
+ xnd([{'name': 'John', 'internet_points': [1, 2, 3]}, {'name': 'Jane', 'internet_points': [4, 5, 6]}],
170
+ type='2 * {name : string, internet_points : 3 * int64}')
171
+
172
+
173
+ Optional data (missing values)
174
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
175
+
176
+ Optional data is currently specified using *None*. It is under debate if
177
+ a separate *NA* singleton object would be more suitable.
178
+
179
+ .. doctest::
180
+
181
+ >>> lst = [0, 1, None, 2, 3, None, 5, 10]
182
+ >>> xnd(lst)
183
+ xnd([0, 1, None, 2, 3, None, 5, 10], type='8 * ?int64')
184
+
185
+
186
+ Categorical data
187
+ ~~~~~~~~~~~~~~~~
188
+
189
+ Type inference would be ambiguous, so it cannot work directly. xnd supports
190
+ the *levels* argument that is internally translated to the type.
191
+
192
+ .. doctest::
193
+
194
+ >>> levels = ['January', 'August', 'December', None]
195
+ >>> x = xnd(['January', 'January', None, 'December', 'August', 'December', 'December'], levels=levels)
196
+ >>> x.value
197
+ ['January', 'January', None, 'December', 'August', 'December', 'December']
198
+ >>> x.type
199
+ ndt("7 * categorical('January', 'August', 'December', NA)")
200
+
201
+
202
+ The above is equivalent to specifying the type directly:
203
+
204
+ .. doctest::
205
+
206
+ >>> from ndtypes import *
207
+ >>> t = ndt("7 * categorical('January', 'August', 'December', NA)")
208
+ >>> x = xnd(['January', 'January', None, 'December', 'August', 'December', 'December'], type=t)
209
+ >>> x.value
210
+ ['January', 'January', None, 'December', 'August', 'December', 'December']
211
+ >>> x.type
212
+ ndt("7 * categorical('January', 'August', 'December', NA)")
213
+
214
+
215
+ Explicit types
216
+ --------------
217
+
218
+ While type inference is well-defined, it necessarily makes assumptions about
219
+ the programmer's intent.
220
+
221
+ There are two cases where types should be given:
222
+
223
+
224
+ Different types are intended
225
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
226
+
227
+ .. doctest::
228
+
229
+ >>> xnd([[0,1,2], [3,4,5]], type="2 * 3 * uint8")
230
+ xnd([[0, 1, 2], [3, 4, 5]], type='2 * 3 * uint8')
231
+
232
+ Here, type inference would deduce :c:macro:`int64`, so :c:macro:`uint8` needs
233
+ to be passed explicitly.
234
+
235
+
236
+ Performance
237
+ ~~~~~~~~~~~
238
+
239
+ For large arrays, explicit types are significantly faster. Type inference
240
+ supports arbitrary nesting depth, is complex and still implemented in pure
241
+ Python. Compare:
242
+
243
+ .. doctest::
244
+
245
+ >>> lst = [1] * 1000000
246
+ >>> x = xnd(lst) # inference
247
+ >>>
248
+ >>> x = xnd(lst, type='1000000 * int64') # explicit
249
+
250
+
251
+ All supported types
252
+ -------------------
253
+
254
+ Fixed arrays
255
+ ~~~~~~~~~~~~
256
+
257
+ Fixed arrays are similar to NumPy's ndarray. One difference is that internally
258
+ xnd uses steps instead of strides. One step is the amount of indices required
259
+ to move the linear index from one dimension element to the next.
260
+
261
+ This facilitates optional data, whose bitmaps need to be addressed by the
262
+ linear index. The equation *stride = step * itemsize* always holds.
263
+
264
+
265
+ .. doctest::
266
+
267
+ >>> xnd([[[1,2], [None, 3]], [[4, None], [5, 6]]])
268
+ xnd([[[1, 2], [None, 3]], [[4, None], [5, 6]]], type='2 * 2 * 2 * ?int64')
269
+
270
+ This is a fixed array with optional data.
271
+
272
+
273
+ .. doctest::
274
+
275
+ >>> xnd([(1,2.0,3j), (4,5.0,6j)])
276
+ xnd([(1, 2.0, 3j), (4, 5.0, 6j)], type='2 * (int64, float64, complex128)')
277
+
278
+ An array with tuple elements.
279
+
280
+
281
+ Fortran order
282
+ ~~~~~~~~~~~~~
283
+
284
+ Fortran order is specified by prefixing the dimensions with an exclamation mark:
285
+
286
+ .. doctest::
287
+
288
+ >>> lst = [[1, 2, 3], [4, 5, 6]]
289
+ >>> x = xnd(lst, type='!2 * 3 * uint16')
290
+ >>>
291
+ >>> x.type.shape
292
+ (2, 3)
293
+ >>> x.type.strides
294
+ (2, 4)
295
+
296
+
297
+ Alternatively, steps can be passed as arguments to the fixed dimension type:
298
+
299
+ .. doctest::
300
+
301
+ >>> from ndtypes import *
302
+ >>> lst = [[1, 2, 3], [4, 5, 6]]
303
+ >>> t = ndt("fixed(shape=2, step=1) * fixed(shape=3, step=2) * uint16")
304
+ >>> x = xnd(lst, type=t)
305
+ >>> x.type.shape
306
+ (2, 3)
307
+ >>> x.type.strides
308
+ (2, 4)
309
+
310
+
311
+ Ragged arrays
312
+ ~~~~~~~~~~~~~
313
+
314
+ Ragged arrays with explicit types are easiest to construct using the *dtype*
315
+ argument to the xnd constructor.
316
+
317
+ .. doctest::
318
+
319
+ >>> lst = [[0], [1, 2], [3, 4, 5]]
320
+ >>> xnd(lst, dtype="int32")
321
+ xnd([[0], [1, 2], [3, 4, 5]], type='var * var * int32')
322
+
323
+
324
+ Alternatively, offsets can be passed as arguments to the var dimension type:
325
+
326
+ .. doctest::
327
+
328
+ >>> from ndtypes import ndt
329
+ >>> t = ndt("var(offsets=[0,3]) * var(offsets=[0,1,3,6]) * int32")
330
+ >>> xnd(lst, type=t)
331
+ xnd([[0], [1, 2], [3, 4, 5]], type='var * var * int32')
332
+
333
+
334
+ Tuples
335
+ ~~~~~~
336
+
337
+ In memory, tuples are the same as C structs.
338
+
339
+ .. doctest::
340
+
341
+ >>> xnd(("foo", 1.0))
342
+ xnd(('foo', 1.0), type='(string, float64)')
343
+
344
+
345
+ Indexing works the same as for arrays:
346
+
347
+ .. doctest::
348
+
349
+ >>> x = xnd(("foo", 1.0))
350
+ >>> x[0]
351
+ xnd('foo', type='string')
352
+
353
+
354
+ Nested tuples are more general than ragged arrays. They can a) hold different
355
+ data types and b) the trees they represent may be unbalanced.
356
+
357
+ They do not allow slicing though and are probably less efficient.
358
+
359
+ This is an example of an unbalanced tree that cannot be represented as a
360
+ ragged array:
361
+
362
+ .. doctest::
363
+
364
+ >>> unbalanced_tree = (((1.0, 2.0), (3.0)), 4.0, ((5.0, 6.0, 7.0), ()))
365
+ >>> x = xnd(unbalanced_tree)
366
+ >>> x.value
367
+ (((1.0, 2.0), 3.0), 4.0, ((5.0, 6.0, 7.0), ()))
368
+ >>> x.type
369
+ ndt("(((float64, float64), float64), float64, ((float64, float64, float64), ()))")
370
+ >>>
371
+ >>> x[0]
372
+ xnd(((1.0, 2.0), 3.0), type='((float64, float64), float64)')
373
+ >>> x[0][0]
374
+ xnd((1.0, 2.0), type='(float64, float64)')
375
+
376
+
377
+ Note that the data in the above tree example is packed into a single contiguous
378
+ memory block.
379
+
380
+
381
+ Records
382
+ ~~~~~~~
383
+
384
+ In memory, records are C structs. The field names are only stored in the type.
385
+
386
+ The following examples use Python-3.6, which keeps the dict initialization
387
+ order.
388
+
389
+ .. doctest::
390
+
391
+ >>> x = xnd({'a': b'123', 'b': {'x': 1.2, 'y': 100+3j}})
392
+ >>> x.value
393
+ {'a': b'123', 'b': {'x': 1.2, 'y': (100+3j)}}
394
+ >>> x.type
395
+ ndt("{a : bytes, b : {x : float64, y : complex128}}")
396
+
397
+
398
+ Indexing works the same as for arrays. Additionally, fields can be indexed
399
+ by name:
400
+
401
+ .. doctest::
402
+
403
+ >>> x[0]
404
+ xnd(b'123', type='bytes')
405
+ >>> x['a']
406
+ xnd(b'123', type='bytes')
407
+ >>> x['b']
408
+ xnd({'x': 1.2, 'y': (100+3j)}, type='{x : float64, y : complex128}')
409
+
410
+
411
+ The nesting depth is arbitrary. In the following example, the data -- except
412
+ for strings, which are pointers -- is packed into a single contiguous memory
413
+ block:
414
+
415
+ .. code-block:: py
416
+
417
+ >>> from pprint import pprint
418
+ >>> item = {
419
+ ... "id": 1001,
420
+ ... "name": "cyclotron",
421
+ ... "price": 5998321.99,
422
+ ... "tags": ["connoisseur", "luxury"],
423
+ ... "stock": {
424
+ ... "warehouse": 722,
425
+ ... "retail": 20
426
+ ... }
427
+ ... }
428
+ >>> x = xnd(item)
429
+ >>>
430
+ >>> pprint(x.value)
431
+ {'id': 1001,
432
+ 'name': 'cyclotron',
433
+ 'price': 5998321.99,
434
+ 'stock': {'retail': 20, 'warehouse': 722},
435
+ 'tags': ['connoisseur', 'luxury']}
436
+ >>>
437
+ >>> x.type.pprint()
438
+ {
439
+ id : int64,
440
+ name : string,
441
+ price : float64,
442
+ tags : 2 * string,
443
+ stock : {
444
+ warehouse : int64,
445
+ retail : int64
446
+ }
447
+ }
448
+
449
+
450
+ Strings can be embedded into the array by specifying the fixed string type.
451
+ In this case, the memory block is pointer-free.
452
+
453
+ .. code-block:: py
454
+
455
+ >>> from ndtypes import ndt
456
+ >>>
457
+ >>> t = """
458
+ ... { id : int64,
459
+ ... name : fixed_string(30),
460
+ ... price : float64,
461
+ ... tags : 2 * fixed_string(30),
462
+ ... stock : {warehouse : int64, retail : int64}
463
+ ... }
464
+ ... """
465
+ >>>
466
+ >>> x = xnd(item, type=t)
467
+ >>> x.type.pprint()
468
+ {
469
+ id : int64,
470
+ name : fixed_string(30),
471
+ price : float64,
472
+ tags : 2 * fixed_string(30),
473
+ stock : {
474
+ warehouse : int64,
475
+ retail : int64
476
+ }
477
+ }
478
+
479
+
480
+ Record of arrays
481
+ ~~~~~~~~~~~~~~~~
482
+
483
+ Often it is more memory efficient to store an array of records as a record of
484
+ arrays. This example with columnar data is from the Arrow homepage:
485
+
486
+ .. doctest::
487
+
488
+ >>> data = {'session_id': [1331247700, 1331247702, 1331247709, 1331247799],
489
+ ... 'timestamp': [1515529735.4895875, 1515529746.2128427, 1515529756.4485607, 1515529766.2181058],
490
+ ... 'source_ip': ['8.8.8.100', '100.2.0.11', '99.101.22.222', '12.100.111.200']}
491
+ >>> x = xnd(data)
492
+ >>> x.type
493
+ ndt("{session_id : 4 * int64, timestamp : 4 * float64, source_ip : 4 * string}")
494
+
495
+
496
+
497
+ References
498
+ ~~~~~~~~~~
499
+
500
+ References are transparent pointers to new memory blocks (meaning a new
501
+ data pointer, not a whole new xnd buffer).
502
+
503
+ For example, this is an array of pointer to array:
504
+
505
+ .. doctest::
506
+
507
+ >>> t = ndt("3 * ref(4 * uint64)")
508
+ >>> lst = [[0,1,2,3], [4,5,6,7], [8,9,10,11]]
509
+ >>> xnd(lst, type=t)
510
+ xnd([[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]], type='3 * ref(4 * uint64)')
511
+
512
+ The user sees no difference to a regular 3 by 4 array, but internally
513
+ the outer dimension consists of three pointers to the inner arrays.
514
+
515
+ For memory blocks generated by xnd itself the feature is not so useful --
516
+ after all, it is usually better to have a single memory block than one
517
+ with additional pointers.
518
+
519
+
520
+ However, suppose that in the above columnar data example another application
521
+ represents the arrays inside the record with pointers. Using the *ref* type,
522
+ data structures borrowed from such an application can be properly typed:
523
+
524
+ .. doctest::
525
+
526
+ >>> t = ndt("{session_id : &4 * int64, timestamp : &4 * float64, source_ip : &4 * string}")
527
+ >>> x = xnd(data, type=t)
528
+ >>> x.type
529
+ ndt("{session_id : ref(4 * int64), timestamp : ref(4 * float64), source_ip : ref(4 * string)}")
530
+
531
+ The ampersand is the shorthand for "ref".
532
+
533
+
534
+
535
+ Constructors
536
+ ~~~~~~~~~~~~
537
+
538
+ Constructors are xnd's way of creating distinct named types. The constructor
539
+ argument is a regular type.
540
+
541
+ Constructors open up a new dtype, so named arrays can be the dtype of
542
+ other arrays. Type inference currently isn't aware of constructors,
543
+ so types have to be provided.
544
+
545
+ .. doctest::
546
+
547
+ >>> t = ndt("3 * SomeMatrix(2 * 2 * float32)")
548
+ >>> lst = [[[1,2], [3,4]], [[5,6], [7,8]], [[9,10], [11,12]]]
549
+ >>> x = xnd(lst, type=t)
550
+ >>> x
551
+ xnd([[[1.0, 2.0], [3.0, 4.0]], [[5.0, 6.0], [7.0, 8.0]], [[9.0, 10.0], [11.0, 12.0]]],
552
+ type='3 * SomeMatrix(2 * 2 * float32)')
553
+ >>> x[0]
554
+ xnd([[1.0, 2.0], [3.0, 4.0]], type='SomeMatrix(2 * 2 * float32)')
555
+
556
+
557
+ Categorical
558
+ ~~~~~~~~~~~
559
+
560
+ Categorical types contain values. The data stored in xnd buffers are indices
561
+ (:c:macro:`int64`) into the type's categories.
562
+
563
+ .. doctest::
564
+
565
+ >>> t = ndt("categorical('a', 'b', 'c', NA)")
566
+ >>> data = ['a', 'a', 'b', 'a', 'a', 'a', 'foo', 'c']
567
+ >>> x = xnd(data, dtype=t)
568
+ >>> x.value
569
+ ['a', 'a', 'b', 'a', 'a', 'a', None, 'c']
570
+
571
+
572
+ Fixed String
573
+ ~~~~~~~~~~~~
574
+
575
+ Fixed strings are embedded into arrays. Supported encodings are 'ascii',
576
+ 'utf8', 'utf16' and 'utf32'. The string size argument denotes the number
577
+ of code points rather than bytes.
578
+
579
+ .. doctest::
580
+
581
+ >>> t = ndt("10 * fixed_string(3, 'utf32')")
582
+ >>> x = xnd.empty(t)
583
+ >>> x.value
584
+ ['', '', '', '', '', '', '', '', '', '']
585
+ >>> x[3] = "\U000003B1\U000003B2\U000003B3"
586
+ >>> x.value
587
+ ['', '', '', 'αβγ', '', '', '', '', '', '']
588
+
589
+
590
+ Fixed Bytes
591
+ ~~~~~~~~~~~
592
+
593
+ Fixed bytes are embedded into arrays.
594
+
595
+ .. doctest::
596
+
597
+ >>> t = ndt("3 * fixed_bytes(size=3)")
598
+ >>> x = xnd.empty(t)
599
+ >>> x[2] = b'123'
600
+ >>> x.value
601
+ [b'\x00\x00\x00', b'\x00\x00\x00', b'123']
602
+ >>> x.align
603
+ 1
604
+
605
+ Alignment can be requested with the requirement that size is a multiple of
606
+ alignment:
607
+
608
+ .. doctest::
609
+
610
+ >>> t = ndt("3 * fixed_bytes(size=32, align=16)")
611
+ >>> x = xnd.empty(t)
612
+ >>> x.align
613
+ 16
614
+
615
+
616
+ String
617
+ ~~~~~~
618
+
619
+ Strings are pointers to :c:macro:`NUL`-terminated UTF-8 strings.
620
+
621
+ .. doctest::
622
+
623
+ >>> x = xnd.empty("10 * string")
624
+ >>> x.value
625
+ ['', '', '', '', '', '', '', '', '', '']
626
+ >>> x[0] = "abc"
627
+ >>> x.value
628
+ ['abc', '', '', '', '', '', '', '', '', '']
629
+
630
+
631
+
632
+ Bytes
633
+ ~~~~~
634
+
635
+ Internally, bytes are structs with a size field and a pointer to the data.
636
+
637
+ .. doctest::
638
+
639
+ >>> xnd([b'123', b'45678'])
640
+ xnd([b'123', b'45678'], type='2 * bytes')
641
+
642
+
643
+ The bytes constructor takes an optional *align* argument that specifies the
644
+ alignment of the allocated data:
645
+
646
+ .. doctest::
647
+
648
+ >>> x = xnd([b'abc', b'123'], type="2 * bytes(align=64)")
649
+ >>> x.value
650
+ [b'abc', b'123']
651
+ >>> x.align
652
+ 8
653
+
654
+ Note that *x.align* is the alignment of the array. The embedded pointers
655
+ to the bytes data are aligned at *64*.
656
+
657
+
658
+ Primitive types
659
+ ~~~~~~~~~~~~~~~
660
+
661
+ As a short example, here is a tuple that contains all primitive types:
662
+
663
+ .. doctest::
664
+
665
+ >>> s = """
666
+ ... (bool,
667
+ ... int8, int16, int32, int64,
668
+ ... uint8, uint16, uint32, uint64,
669
+ ... float16, float32, float64,
670
+ ... complex32, complex64, complex128)
671
+ ... """
672
+ >>> x = xnd.empty(s)
673
+ >>> x.value
674
+ (False, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0, 0.0, 0j, 0j, 0j)