PyPI - quasardb - Versions diffs - 3.14.2.dev1__cp311-cp311-win32.whl → 3.14.2.dev3__cp311-cp311-win32.whl - Mend

quasardb 3.14.2.dev1cp311-cp311-win32.whl → 3.14.2.dev3cp311-cp311-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of quasardb might be problematic. Click here for more details.

Files changed (59) hide show

quasardb/CMakeLists.txt +19 -12
quasardb/INSTALL.vcxproj +4 -0
quasardb/__init__.py +33 -4
quasardb/cluster.cpp +14 -1
quasardb/cluster.hpp +114 -72
quasardb/concepts.hpp +56 -12
quasardb/continuous.cpp +84 -34
quasardb/continuous.hpp +10 -7
quasardb/convert/array.hpp +23 -6
quasardb/convert/value.hpp +78 -7
quasardb/date/ALL_BUILD.vcxproj +4 -4
quasardb/date/CMakeFiles/Export/df49adab93b9e0c10c64f72458b31971/dateTargets.cmake +12 -12
quasardb/date/CMakeFiles/generate.stamp.depend +4 -4
quasardb/date/INSTALL.vcxproj +4 -0
quasardb/date/dateConfigVersion.cmake +0 -5
quasardb/date/dateTargets.cmake +3 -7
quasardb/detail/invoke.hpp +0 -0
quasardb/detail/retry.cpp +30 -0
quasardb/detail/retry.hpp +147 -0
quasardb/detail/sleep.hpp +53 -0
quasardb/{writer.cpp → detail/writer.cpp} +68 -162
quasardb/detail/writer.hpp +550 -0
quasardb/error.hpp +76 -1
quasardb/masked_array.hpp +9 -2
quasardb/module.cpp +20 -4
quasardb/node.hpp +17 -8
quasardb/numpy/__init__.py +58 -10
quasardb/object_tracker.hpp +2 -3
quasardb/options.hpp +32 -3
quasardb/pandas/__init__.py +59 -102
quasardb/properties.cpp +41 -0
quasardb/properties.hpp +85 -0
quasardb/pybind11/ALL_BUILD.vcxproj +4 -4
quasardb/pybind11/CMakeFiles/generate.stamp.depend +14 -14
quasardb/pybind11/INSTALL.vcxproj +4 -0
quasardb/qdb_api.dll +0 -0
quasardb/quasardb.cp311-win32.pyd +0 -0
quasardb/range-v3/ALL_BUILD.vcxproj +4 -4
quasardb/range-v3/CMakeFiles/Export/d94ef200eca10a819b5858b33e808f5b/range-v3-targets.cmake +12 -12
quasardb/range-v3/CMakeFiles/generate.stamp.depend +11 -11
quasardb/range-v3/INSTALL.vcxproj +4 -0
quasardb/range-v3/cmake_install.cmake +36 -0
quasardb/range-v3/range-v3-config-version.cmake +0 -5
quasardb/range-v3/range-v3-config.cmake +3 -7
quasardb/range-v3/range.v3.headers.vcxproj +4 -4
quasardb/reader.cpp +282 -0
quasardb/reader.hpp +256 -0
quasardb/table.cpp +4 -36
quasardb/table.hpp +69 -28
quasardb/traits.hpp +23 -0
quasardb/writer.hpp +245 -287
{quasardb-3.14.2.dev1.dist-info → quasardb-3.14.2.dev3.dist-info}/METADATA +7 -7
{quasardb-3.14.2.dev1.dist-info → quasardb-3.14.2.dev3.dist-info}/RECORD +56 -50
{quasardb-3.14.2.dev1.dist-info → quasardb-3.14.2.dev3.dist-info}/WHEEL +1 -1
quasardb/reader/ts_row.hpp +0 -281
quasardb/reader/ts_value.hpp +0 -245
quasardb/table_reader.hpp +0 -220
{quasardb-3.14.2.dev1.dist-info → quasardb-3.14.2.dev3.dist-info}/LICENSE.md +0 -0
{quasardb-3.14.2.dev1.dist-info → quasardb-3.14.2.dev3.dist-info}/top_level.txt +0 -0

quasardb/reader.cpp ADDED Viewed

@@ -0,0 +1,282 @@
+#include "reader.hpp"
+#include "error.hpp"
+#include "table.hpp"
+#include "traits.hpp"
+#include "convert/array.hpp"
+#include "convert/value.hpp"
+#include "detail/qdb_resource.hpp"
+#include <range/v3/view/counted.hpp>
+namespace qdb
+{
+namespace detail
+{
+/* static */ py::dict reader_data::convert(qdb_bulk_reader_table_data_t const & data)
+{
+    py::dict ret{};
+    // typedef struct
+    // {
+    //     qdb_size_t row_count;
+    //     qdb_size_t column_count;
+    //     const qdb_timespec_t * timestamps;
+    //     const qdb_exp_batch_push_column_t * columns;
+    // } qdb_exp_batch_push_table_data_t;
+    // Convert the timestamp index, which should never contain null values
+    // and thus is *not* a masked array.
+    auto timestamps = ranges::views::counted(data.timestamps, data.row_count);
+    auto columns    = ranges::views::counted(data.columns, data.column_count);
+    py::array idx          = convert::array<qdb_timespec_t, traits::datetime64_ns_dtype>(timestamps);
+    qdb::masked_array idx_ = qdb::masked_array::masked_none(idx);
+    ret[py::str("$timestamp")] = idx;
+    for (qdb_exp_batch_push_column_t const & column : columns)
+    {
+        // typedef struct // NOLINT(modernize-use-using)
+        // {
+        //     char const * name;
+        //     qdb_ts_column_type_t data_type;
+        //     union
+        //     {
+        //         const qdb_timespec_t * timestamps;
+        //         const qdb_string_t * strings;
+        //         const qdb_blob_t * blobs;
+        //         const qdb_int_t * ints;
+        //         const double * doubles;
+        //     } data;
+        // } qdb_exp_batch_push_column_t;
+        py::str column_name{column.name};
+        qdb::masked_array xs;
+        switch (column.data_type)
+        {
+        case qdb_ts_column_int64:
+            xs = convert::masked_array<qdb_int_t, traits::int64_dtype>(
+                ranges::views::counted(column.data.ints, data.row_count));
+            break;
+        case qdb_ts_column_double:
+            xs = convert::masked_array<double, traits::float64_dtype>(
+                ranges::views::counted(column.data.doubles, data.row_count));
+            break;
+        case qdb_ts_column_string:
+            xs = convert::masked_array<qdb_string_t, traits::unicode_dtype>(
+                ranges::views::counted(column.data.strings, data.row_count));
+            break;
+        case qdb_ts_column_blob:
+            xs = convert::masked_array<qdb_blob_t, traits::pyobject_dtype>(
+                ranges::views::counted(column.data.blobs, data.row_count));
+            break;
+        case qdb_ts_column_timestamp:
+            xs = convert::masked_array<qdb_timespec_t, traits::datetime64_ns_dtype>(
+                ranges::views::counted(column.data.timestamps, data.row_count));
+            break;
+        case qdb_ts_column_symbol:
+            // This should not happen, as "symbol" is just an internal representation, and symbols
+            // are exposed to the user as strings. If this actually happens, it indicates either
+            // a bug in the bulk reader *or* a memory corruption.
+            throw qdb::not_implemented_exception(
+                "Internal error: invalid data type: symbol column type returned from bulk reader");
+        case qdb_ts_column_uninitialized:
+            throw qdb::not_implemented_exception(
+                "Internal error: invalid data type: unintialized column "
+                "type returned from bulk reader");
+        };
+        ret[std::move(column_name)] = std::move(xs.cast(py::return_value_policy::move));
+    }
+    return ret;
+}
+reader_iterator & reader_iterator::operator++()
+{
+    if (ptr_ == nullptr)
+    {
+        // This means this is either the first invocation, or we have
+        // previously exhausted all tables in the current "fetch" and
+        // should fetch next.
+        qdb_error_t err = qdb_bulk_reader_get_data(reader_, &ptr_, batch_size_);
+        if (err == qdb_e_iterator_end) [[unlikely]]
+        {
+            // We have reached the end -- reset all our internal state, and make us look
+            // like the "end" iterator.
+            handle_      = nullptr;
+            reader_      = nullptr;
+            batch_size_  = 0;
+            table_count_ = 0;
+            ptr_         = nullptr;
+            n_           = 0;
+        }
+        else
+        {
+            qdb::qdb_throw_if_error(*handle_, err);
+            // I like assertions
+            assert(handle_ != nullptr);
+            assert(reader_ != nullptr);
+            assert(table_count_ != 0);
+            assert(ptr_ != nullptr);
+            n_ = 0;
+        }
+    }
+    else
+    {
+        assert(ptr_ != nullptr);
+        if (++n_ == table_count_)
+        {
+            // We have exhausted our tables. What we will do is just "reset" our internal state
+            // to how it would be after the initial constructor, and recurse into this function,
+            // which should then just follow the regular flow above
+            qdb_release(*handle_, ptr_);
+            ptr_ = nullptr;
+            n_   = 0;
+            return this->operator++();
+        }
+        // At this point, we *must* have a valid state
+        assert(ptr_ != nullptr);
+        assert(n_ < table_count_);
+    } // if (ptr_ == nullptr)
+    return *this;
+};
+}; // namespace detail
+qdb::reader const & reader::enter()
+{
+    // Very small scope, because we don't need any of the allocated memory after this function is
+    // finished, so we will also release memory early.
+    qdb::object_tracker::scoped_repository object_tracker{};
+    qdb::object_tracker::scoped_capture capture{object_tracker};
+    std::vector<qdb_bulk_reader_table_t> tables{};
+    tables.reserve(table_names_.size());
+    //
+    // Convert columns if applicable
+    //
+    char const ** columns{nullptr};
+    // If column names were provided, set them. Otherwise, it defaults to "all columns".
+    if (column_names_.empty() == false)
+    {
+        // Note that this particular converter copies the string and it's tracked
+        // using the object tracker.
+        //
+        // Pre-allocate the data for the columns, make sure that the memory is tracked,
+        // so we don't have to worry about memory loss.
+        columns = object_tracker::alloc<char const *>(column_names_.size() * sizeof(char const *));
+        for (std::size_t i = 0; i < column_names_.size(); ++i)
+        {
+            // Because the scope of `column_names_` outlives this function / scope, we don't have
+            // to copy the string, but can just directly use the .c_str() and things will work out.
+            columns[i] = column_names_.at(i).c_str();
+        }
+    }
+    qdb_ts_range_t * ranges{nullptr};
+    if (ranges_.empty() == false)
+    {
+        // Pre-allocate the data for the columns, make sure that the memory is tracked,
+        // so we don't have to worry about memory loss.
+        ranges = object_tracker::alloc<qdb_ts_range_t>(ranges_.size() * sizeof(qdb_ts_range_t));
+        for (std::size_t i = 0; i < ranges_.size(); ++i)
+        {
+            // This convert::value does not allocate anything on the heap
+            ranges[i] = convert::value<py::tuple, qdb_ts_range_t>(ranges_.at(i));
+        }
+    }
+    // We either have columns and have the actual array set, *or* we do not have any customized
+    // columns at all.
+    // Same applies for ranges
+    assert((columns == nullptr) == (column_names_.empty() == true));
+    assert((ranges == nullptr) == (ranges_.empty() == true));
+    for (std::string const & table_name : table_names_)
+    {
+        tables.emplace_back(qdb_bulk_reader_table_t{
+            // because the scope of `table_name` outlives this function, we can just directly
+            // use .c_str() without any copies.
+            table_name.c_str(),   //
+            columns,              //
+            column_names_.size(), //
+            ranges,               //
+            ranges_.size()        //
+        });
+    }
+    qdb::qdb_throw_if_error(
+        *handle_, qdb_bulk_reader_fetch(*handle_, tables.data(), tables.size(), &reader_));
+    return *this;
+}
+void reader::close()
+{
+    // Even though that from the API it looks like value, qdb_reader_handle_t is actually a pointer
+    // itself that needs to be released. This static assert checks for that.
+    static_assert(std::is_pointer<decltype(reader_)>());
+    if (reader_ != nullptr)
+    {
+        logger_.debug("closing reader");
+        qdb_release(*handle_, reader_);
+        reader_ = nullptr;
+    }
+    assert(reader_ == nullptr);
+}
+void register_reader(py::module_ & m)
+{
+    namespace py = pybind11;
+    auto reader_c = py::class_<qdb::reader>{m, "Reader"};
+    // basic interface
+    reader_c
+        .def(py::init<                                                                         //
+                 qdb::handle_ptr,                                                              //
+                 std::vector<std::string> const &,                                             //
+                 std::vector<std::string> const &,                                             //
+                 std::size_t,                                                                  //
+                 std::vector<py::tuple> const &>(),                                            //
+            py::arg("conn"),                                                                   //
+            py::arg("table_names"),                                                            //
+            py::kw_only(),                                                                     //
+            py::arg("column_names") = std::vector<std::string>{},                              //
+            py::arg("batch_size")   = std::size_t{0},                                          //
+            py::arg("ranges")       = std::vector<py::tuple>{}                                 //
+            )                                                                                  //
+                                                                                               //
+        .def("get_batch_size", &qdb::reader::get_batch_size)                                   //
+                                                                                               //
+        .def("__enter__", &qdb::reader::enter)                                                 //
+        .def("__exit__", &qdb::reader::exit)                                                   //
+        .def(                                                                                  //
+            "__iter__", [](qdb::reader & r) { return py::make_iterator(r.begin(), r.end()); }, //
+            py::keep_alive<0, 1>());
+    //
+}
+} // namespace qdb

quasardb/reader.hpp ADDED Viewed

@@ -0,0 +1,256 @@
+/*
+ *
+ * Official Python API
+ *
+ * Copyright (c) 2009-2021, quasardb SAS. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *    * Neither the name of quasardb nor the names of its contributors may
+ *      be used to endorse or promote products derived from this software
+ *      without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY QUASARDB AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#pragma once
+#include "handle.hpp"
+#include "logger.hpp"
+#include "object_tracker.hpp"
+#include <qdb/ts.h>
+#include <unordered_map>
+#include <vector>
+namespace py = pybind11;
+namespace qdb
+{
+namespace detail
+{
+using int64_column     = std::vector<qdb_int_t>;
+using double_column    = std::vector<double>;
+using timestamp_column = std::vector<qdb_timespec_t>;
+using blob_column      = std::vector<qdb_blob_t>;
+using string_column    = std::vector<qdb_string_t>;
+class reader_data
+{
+public:
+    /**
+     * Utility function which converts table data into a vanilla dict. Currently this works well, as
+     * there isn't any additional data/state we need to keep track of --
+     */
+    static py::dict convert(qdb_bulk_reader_table_data_t const & data);
+};
+class reader_iterator
+{
+public:
+    // Default constructor, which represents the "end" of the range
+    reader_iterator() noexcept
+        : handle_{nullptr}
+        , reader_{nullptr}
+        , batch_size_{0}
+        , table_count_{0}
+        , ptr_{nullptr}
+        , n_{0}
+    {}
+    // Actual initialization
+    reader_iterator(
+        handle_ptr handle, qdb_reader_handle_t reader, std::size_t batch_size, std::size_t table_count)
+        : handle_{handle}
+        , reader_{reader}
+        , batch_size_{batch_size}
+        , table_count_{table_count}
+        , ptr_{nullptr}
+        , n_{0}
+    {
+        // Always immediately try to fetch the first batch.
+        this->operator++();
+    }
+    bool operator!=(reader_iterator const & rhs) const noexcept
+    {
+        return !(*this == rhs);
+    }
+    bool operator==(reader_iterator const & rhs) const noexcept
+    {
+        // This is just a sanity check: if our handle_ is null, it means basically
+        // the entire object has to be null, and this will basically represent the
+        // ".end()" iterator.
+        if (handle_ == nullptr)
+        {
+            assert(reader_ == nullptr);
+            assert(ptr_ == nullptr);
+        }
+        else
+        {
+            assert(reader_ != nullptr);
+            assert(ptr_ != nullptr);
+        }
+        // Optimization: we *only* compare the pointers, we don't actually compare
+        // the data itself. This saves a bazillion comparisons, and for the purpose
+        // of iterators, we really only care whether the current iterator is at the
+        // end.
+        return (handle_ == rhs.handle_              //
+                && reader_ == rhs.reader_           //
+                && batch_size_ == rhs.batch_size_   //
+                && table_count_ == rhs.table_count_ //
+                && ptr_ == rhs.ptr_ && n_ == rhs.n_);
+    }
+    reader_iterator & operator++();
+    py::dict operator*()
+    {
+        assert(ptr_ != nullptr);
+        assert(n_ < table_count_);
+        return reader_data::convert(ptr_[n_]);
+    }
+private:
+    qdb::handle_ptr handle_;
+    qdb_reader_handle_t reader_;
+    /**
+     * The amount of rows to fetch in one operation. This can span multiple tables.
+     */
+    std::size_t batch_size_;
+    /**
+     * `table_count_` enables us to manage how much far we can iterate `ptr_`.
+     */
+    std::size_t table_count_;
+    qdb_bulk_reader_table_data_t * ptr_;
+    std::size_t n_;
+};
+}; // namespace detail
+class reader
+{
+public:
+    using iterator = detail::reader_iterator;
+public:
+    /**
+     * Tables must always be a list of actual table objects. This ensures the lifetime
+     * of any metadata inside the tables (such as its name) will always exceed that
+     * of the reader, which simplifies things a lot.
+     */
+    reader(                                            //
+        qdb::handle_ptr handle,                        //
+        std::vector<std::string> const & table_names,  //
+        std::vector<std::string> const & column_names, //
+        std::size_t batch_size,                        //
+        std::vector<py::tuple> const & ranges)         //
+        : logger_("quasardb.reader")
+        , handle_{handle}
+        , reader_{nullptr}
+        , table_names_{table_names}
+        , column_names_{column_names}
+        , batch_size_{batch_size}
+        , ranges_{ranges}
+    {}
+    // prevent copy because of the table object, use a unique_ptr of the batch in cluster
+    // to return the object.
+    //
+    // we prevent these copies because that is almost never what you want, and it gives us
+    // more freedom in storing a lot of data inside this object.
+    reader(const reader &) = delete;
+    reader(reader &&)      = delete;
+    ~reader()
+    {
+        close();
+    }
+    /**
+     * Convenience function for accessing the configured batch size. Returns 0 when everything should
+     * be read in a single batch.
+     */
+    constexpr inline std::size_t get_batch_size() const noexcept
+    {
+        return batch_size_;
+    }
+    /**
+     * Opens the actual reader; this will initiatate a call to quasardb and initialize the local
+     * reader handle. If table strings are provided instead of qdb::table objects, will automatically
+     * look those up.
+     *
+     * May throw exception upon error.
+     *
+     * :NOTE(leon): We just return a reference to ourselves, but maybe we want the outer object to wrap
+     *              a subclass and return that as well. Not 100% sure if that's the best way to go. This
+     *              works right now and is the same approach that we take with e.g. qdb::cluster
+     */
+    reader const & enter();
+    void exit(pybind11::object type, pybind11::object value, pybind11::object traceback)
+    {
+        return close();
+    }
+    /**
+     * Clean up and close. Does not require all data to be actually read.
+     */
+    void close();
+    iterator begin() const
+    {
+        if (reader_ == nullptr) [[unlikely]]
+        {
+            throw qdb::uninitialized_exception{
+                "Reader not yet opened: please encapsulate calls to the reader in a `with` block, or "
+                "explicitly `open` and `close` the resource"};
+        }
+        return iterator{handle_, reader_, batch_size_, table_names_.size()};
+    }
+    iterator end() const noexcept
+    {
+        return iterator{};
+    }
+private:
+    qdb::logger logger_;
+    qdb::handle_ptr handle_;
+    qdb_reader_handle_t reader_;
+    std::vector<std::string> table_names_;
+    std::vector<std::string> column_names_;
+    std::size_t batch_size_;
+    std::vector<py::tuple> ranges_;
+};
+using reader_ptr = std::unique_ptr<reader>;
+void register_reader(py::module_ & m);
+} // namespace qdb

quasardb/table.cpp CHANGED Viewed

@@ -2,9 +2,10 @@
 #include "dispatch.hpp"
 #include "metrics.hpp"
 #include "object_tracker.hpp"
-#include "table_reader.hpp"
+#include "reader.hpp"
 #include "traits.hpp"
 #include "convert/point.hpp"
+#include <memory> // for make_unique
 namespace qdb
 {
@@ -100,42 +101,8 @@ void table::_cache_metadata() const
     {
         _ttl = std::chrono::milliseconds{metadata->ttl};
     }
-}
-py::object table::reader(
-    const std::vector<std::string> & columns, py::object ranges, bool dict_mode) const
-{
-    _handle->check_open();
-    auto ranges_ = qdb::convert_ranges(ranges);
-    std::vector<detail::column_info> c_columns;
-    if (columns.empty())
-    {
-        // This is a kludge, because technically a table can have no columns, and we're
-        // abusing it as "no argument provided". It's a highly exceptional use case, and
-        // doesn't really have any implication in practice (we just look up twice), so it
-        // should be ok.
-        c_columns = list_columns();
-    }
-    else
-    {
-        c_columns.reserve(columns.size());
-        // This transformation can probably be optimized, but it's only invoked when constructing
-        // the reader so it's unlikely to be a performance bottleneck.
-        std::transform(std::cbegin(columns), std::cend(columns), std::back_inserter(c_columns),
-            [this](const auto & col) {
-                const auto & info = column_info_by_id(col);
-                return detail::column_info{info.type, col, info.symtable};
-            });
-    }
-    return (dict_mode == true
-                ? py::cast(qdb::table_reader<reader::ts_dict_row>(_handle, _alias, c_columns, ranges_),
-                    py::return_value_policy::move)
-                : py::cast(qdb::table_reader<reader::ts_fast_row>(_handle, _alias, c_columns, ranges_),
-                    py::return_value_policy::move));
+    _shard_size = std::chrono::milliseconds{metadata->shard_size};
 }
 qdb_uint_t table::erase_ranges(const std::string & column, py::object ranges)
@@ -286,4 +253,5 @@ std::pair<pybind11::array, masked_array> table::timestamp_get_ranges(
     return ret;
 }
 }; // namespace qdb

quasardb 3.14.2.dev1__cp311-cp311-win32.whl → 3.14.2.dev3__cp311-cp311-win32.whl

Potentially problematic release.

quasardb 3.14.2.dev1cp311-cp311-win32.whl → 3.14.2.dev3cp311-cp311-win32.whl