clickhouse-native 0.9.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/ext/clickhouse_native/client.cpp +42 -3
  3. data/ext/clickhouse_native/extconf.rb +30 -3
  4. data/ext/clickhouse_native/vendor/clickhouse-cpp/.github/workflows/bazel.yml +120 -0
  5. data/ext/clickhouse_native/vendor/clickhouse-cpp/.github/workflows/cross-repo-bug-relay.yml +17 -0
  6. data/ext/clickhouse_native/vendor/clickhouse-cpp/.github/workflows/linux.yml +22 -23
  7. data/ext/clickhouse_native/vendor/clickhouse-cpp/.github/workflows/macos.yml +22 -21
  8. data/ext/clickhouse_native/vendor/clickhouse-cpp/.github/workflows/windows_mingw.yml +29 -36
  9. data/ext/clickhouse_native/vendor/clickhouse-cpp/.github/workflows/windows_msvc.yml +29 -36
  10. data/ext/clickhouse_native/vendor/clickhouse-cpp/.gitignore +6 -0
  11. data/ext/clickhouse_native/vendor/clickhouse-cpp/AI_POLICY.md +13 -0
  12. data/ext/clickhouse_native/vendor/clickhouse-cpp/BUILD.bazel +167 -0
  13. data/ext/clickhouse_native/vendor/clickhouse-cpp/CMakeLists.txt +2 -1
  14. data/ext/clickhouse_native/vendor/clickhouse-cpp/MODULE.bazel +17 -0
  15. data/ext/clickhouse_native/vendor/clickhouse-cpp/MODULE.bazel.lock +503 -0
  16. data/ext/clickhouse_native/vendor/clickhouse-cpp/README.md +32 -6
  17. data/ext/clickhouse_native/vendor/clickhouse-cpp/ci/docker-compose/config.xml +53 -0
  18. data/ext/clickhouse_native/vendor/clickhouse-cpp/ci/docker-compose/users.xml +35 -0
  19. data/ext/clickhouse_native/vendor/clickhouse-cpp/ci/docker-compose.yml +22 -0
  20. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/CMakeLists.txt +11 -0
  21. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/base/sslsocket.cpp +24 -0
  22. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/block.cpp +1 -1
  23. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/block.h +2 -1
  24. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/client.cpp +293 -136
  25. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/client.h +31 -2
  26. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/columns/array.cpp +12 -0
  27. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/columns/array.h +17 -7
  28. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/columns/bool.cpp +79 -0
  29. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/columns/bool.h +62 -0
  30. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/columns/factory.cpp +16 -0
  31. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/columns/itemview.cpp +2 -0
  32. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/columns/itemview.h +6 -2
  33. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/columns/json.cpp +102 -0
  34. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/columns/json.h +82 -0
  35. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/columns/lowcardinality.cpp +2 -1
  36. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/columns/string.cpp +7 -2
  37. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/columns/tuple.cpp +48 -5
  38. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/columns/tuple.h +14 -1
  39. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/query.h +2 -2
  40. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/server_exception.h +0 -3
  41. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/types/type_parser.cpp +43 -0
  42. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/types/type_parser.h +9 -0
  43. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/types/types.cpp +61 -11
  44. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/types/types.h +18 -2
  45. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/version.h +1 -1
  46. data/lib/clickhouse_native/version.rb +1 -1
  47. data/lib/clickhouse_native.rb +1 -0
  48. metadata +14 -1
@@ -10,6 +10,7 @@
10
10
  #include "columns/geo.h"
11
11
  #include "columns/ip4.h"
12
12
  #include "columns/ip6.h"
13
+ #include "columns/json.h"
13
14
  #include "columns/lowcardinality.h"
14
15
  #include "columns/nothing.h"
15
16
  #include "columns/nullable.h"
@@ -19,6 +20,7 @@
19
20
  #include "columns/tuple.h"
20
21
  #include "columns/time.h"
21
22
  #include "columns/uuid.h"
23
+ #include "columns/bool.h"
22
24
 
23
25
  #include <chrono>
24
26
  #include <cstdint>
@@ -247,6 +249,9 @@ public:
247
249
  /// Intends for execute arbitrary queries.
248
250
  void Execute(const Query& query);
249
251
 
252
+ /// Alias for Execute.
253
+ void Select(const Query& query);
254
+
250
255
  /// Intends for execute select queries. Data will be returned with
251
256
  /// one or more call of \p cb.
252
257
  void Select(const std::string& query, SelectCallback cb);
@@ -267,8 +272,30 @@ public:
267
272
  void SelectWithExternalDataCancelable(const std::string& query, const ExternalTables& external_tables, SelectCancelableCallback cb);
268
273
  void SelectWithExternalDataCancelable(const std::string& query, const std::string& query_id, const ExternalTables& external_tables, SelectCancelableCallback cb);
269
274
 
270
- /// Alias for Execute.
271
- void Select(const Query& query);
275
+ /// EXPERIMENTAL. Intends for execute arbitrary queries while reading the data interactively with
276
+ /// NextBlock().
277
+ void BeginExecute(const Query& query);
278
+
279
+ /// EXPERIMENTAL. Alias for BeginExecute.
280
+ void BeginSelect(const Query& query);
281
+
282
+ /// EXPERIMENTAL. Interactive version of select, data will be returned on consequent calls
283
+ /// to NextBlock().
284
+ void BeginSelect(const char* query);
285
+ void BeginSelect(const std::string& query);
286
+ void BeginSelect(const std::string& query, const std::string& query_id);
287
+
288
+ /// Returns the next block in the dataset after using BeginSelect family of functions
289
+ /// functions.
290
+ std::optional<Block> NextBlock();
291
+
292
+ // EXPERIMENTAL. Cancels current execution of BeginSelect and drains all in-flight data.
293
+ // Consecutive calls to NextBlock() after Cancel() will throw an exception.
294
+ void Cancel();
295
+
296
+ // EXPERIMENTAL. Returns true if the client is still in data-receiving mode and more future
297
+ // calls to NextBlock().
298
+ bool IsSelecting() const;
272
299
 
273
300
  /// Intends for insert block of data into a table \p table_name.
274
301
  void Insert(const std::string& table_name, const Block& block);
@@ -284,6 +311,8 @@ public:
284
311
  /// End an \p INSERT session started by \p BeginInsert.
285
312
  void EndInsert();
286
313
 
314
+ bool IsInserting() const;
315
+
287
316
  /// Ping server for aliveness.
288
317
  void Ping();
289
318
 
@@ -167,6 +167,18 @@ ColumnRef ColumnArray::GetData() {
167
167
  return data_;
168
168
  }
169
169
 
170
+ std::shared_ptr<const Column> ColumnArray::GetData() const {
171
+ return data_;
172
+ }
173
+
174
+ std::shared_ptr<ColumnUInt64>& ColumnArray::GetOffsets() {
175
+ return offsets_;
176
+ }
177
+
178
+ std::shared_ptr<const ColumnUInt64> ColumnArray::GetOffsets() const {
179
+ return offsets_;
180
+ }
181
+
170
182
  void ColumnArray::Reset() {
171
183
  data_.reset();
172
184
  offsets_.reset();
@@ -78,14 +78,26 @@ public:
78
78
 
79
79
  void OffsetsIncrease(size_t);
80
80
 
81
+ /// Gets the backing data array of the Array's. This does not include any Array Bounds.
82
+ ColumnRef GetData();
83
+ std::shared_ptr<const Column> GetData() const;
84
+
85
+ /// Gets all offsets denoting the list boundaries overlayed GetData.
86
+ /// The layout is [size_i, ...] where `i` is the row.
87
+ std::shared_ptr<ColumnUInt64>& GetOffsets();
88
+ std::shared_ptr<const ColumnUInt64> GetOffsets() const;
89
+
90
+ /// Gets the offset of the start of row `n` into `GetData()`.
91
+ size_t GetOffset(size_t n) const;
92
+
93
+ /// Gets the element count of row `n`.
94
+ size_t GetSize(size_t n) const;
95
+
81
96
  protected:
82
97
  template<typename T> friend class ColumnArrayT;
83
98
 
84
99
  ColumnArray(ColumnArray&& array);
85
100
 
86
- size_t GetOffset(size_t n) const;
87
- size_t GetSize(size_t n) const;
88
- ColumnRef GetData();
89
101
  void AddOffset(size_t n);
90
102
  void Reset();
91
103
 
@@ -262,11 +274,9 @@ public:
262
274
  template <typename Container>
263
275
  inline void Append(Container&& container) {
264
276
  using container_type = decltype(container);
265
- if constexpr (std::is_lvalue_reference_v<container_type> ||
266
- std::is_const_v<std::remove_reference_t<container_type>>) {
277
+ if constexpr (std::is_lvalue_reference_v<container_type> || std::is_const_v<std::remove_reference_t<container_type>>) {
267
278
  Append(std::begin(container), std::end(container));
268
- }
269
- else {
279
+ } else {
270
280
  Append(std::make_move_iterator(std::begin(container)),
271
281
  std::make_move_iterator(std::end(container)));
272
282
  }
@@ -0,0 +1,79 @@
1
+ #include "bool.h"
2
+
3
+ #include "../types/types.h"
4
+
5
+ namespace clickhouse {
6
+
7
+ ColumnBool::ColumnBool()
8
+ : Column(Type::CreateSimple<bool>())
9
+ , data_()
10
+ {
11
+ }
12
+
13
+ ColumnBool::ColumnBool(std::vector<uint8_t> data)
14
+ : Column(Type::CreateSimple<bool>())
15
+ , data_(std::move(data))
16
+ {
17
+ }
18
+
19
+ void ColumnBool::Reserve(size_t new_cap) {
20
+ data_.Reserve(new_cap);
21
+ }
22
+
23
+ size_t ColumnBool::Capacity() const {
24
+ return data_.Capacity();
25
+ }
26
+
27
+ void ColumnBool::Append(bool value) {
28
+ data_.Append(static_cast<uint8_t>(value));
29
+ }
30
+
31
+ bool ColumnBool::At(size_t n) const {
32
+ return static_cast<bool>(data_.At(n));
33
+ }
34
+
35
+ void ColumnBool::Append(ColumnRef column) {
36
+ if (auto col = column->As<ColumnBool>()) {
37
+ auto& src = col->data_.GetWritableData();
38
+ data_.GetWritableData().insert(data_.GetWritableData().end(), src.begin(), src.end());
39
+ } else if (auto col = column->As<ColumnUInt8>()) {
40
+ auto& src = col->GetWritableData();
41
+ data_.GetWritableData().insert(data_.GetWritableData().end(), src.begin(), src.end());
42
+ }
43
+ }
44
+
45
+ bool ColumnBool::LoadBody(InputStream* input, size_t rows) {
46
+ return data_.LoadBody(input, rows);
47
+ }
48
+
49
+ void ColumnBool::SaveBody(OutputStream* output) {
50
+ data_.SaveBody(output);
51
+ }
52
+
53
+ void ColumnBool::Clear() {
54
+ data_.Clear();
55
+ }
56
+
57
+ size_t ColumnBool::Size() const {
58
+ return data_.Size();
59
+ }
60
+
61
+ ColumnRef ColumnBool::Slice(size_t begin, size_t len) const {
62
+ auto sliced = std::static_pointer_cast<ColumnUInt8>(data_.Slice(begin, len));
63
+ return std::make_shared<ColumnBool>(std::move(sliced->GetWritableData()));
64
+ }
65
+
66
+ ColumnRef ColumnBool::CloneEmpty() const {
67
+ return std::make_shared<ColumnBool>();
68
+ }
69
+
70
+ void ColumnBool::Swap(Column& other) {
71
+ auto& col = dynamic_cast<ColumnBool&>(other);
72
+ data_.Swap(col.data_);
73
+ }
74
+
75
+ ItemView ColumnBool::GetItem(size_t index) const {
76
+ return ItemView{Type::Bool, data_.At(index)};
77
+ }
78
+
79
+ } // namespace clickhouse
@@ -0,0 +1,62 @@
1
+ #pragma once
2
+
3
+ #include "column.h"
4
+ #include "numeric.h"
5
+
6
+ #include "../types/types.h"
7
+
8
+ #include <vector>
9
+
10
+ namespace clickhouse {
11
+
12
+ class ColumnBool : public Column {
13
+ public:
14
+ using ValueType = bool;
15
+
16
+ ColumnBool();
17
+ explicit ColumnBool(std::vector<uint8_t> data);
18
+
19
+ /// Increase the capacity of the column for large block insertion.
20
+ void Reserve(size_t new_cap) override;
21
+
22
+ /// Appends one element to the end of column.
23
+ void Append(bool value);
24
+
25
+ /// Returns element at given row number.
26
+ bool At(size_t n) const;
27
+
28
+ /// Returns element at given row number.
29
+ bool operator[](size_t n) const { return At(n); }
30
+
31
+ /// Returns the capacity of the column
32
+ size_t Capacity() const;
33
+
34
+ public:
35
+ /// Appends content of given column to the end of current one.
36
+ /// Accepts ColumnBool or ColumnUInt8.
37
+ void Append(ColumnRef column) override;
38
+
39
+ /// Loads column data from input stream.
40
+ bool LoadBody(InputStream* input, size_t rows) override;
41
+
42
+ /// Saves column data to output stream.
43
+ void SaveBody(OutputStream* output) override;
44
+
45
+ /// Clear column data.
46
+ void Clear() override;
47
+
48
+ /// Returns count of rows in the column.
49
+ size_t Size() const override;
50
+
51
+ /// Makes slice of the current column.
52
+ ColumnRef Slice(size_t begin, size_t len) const override;
53
+ ColumnRef CloneEmpty() const override;
54
+ void Swap(Column& other) override;
55
+
56
+ ItemView GetItem(size_t index) const override;
57
+
58
+ private:
59
+ ColumnUInt8 data_;
60
+ };
61
+
62
+ } // namespace clickhouse
@@ -7,11 +7,13 @@
7
7
  #include "geo.h"
8
8
  #include "ip4.h"
9
9
  #include "ip6.h"
10
+ #include "json.h"
10
11
  #include "lowcardinality.h"
11
12
  #include "lowcardinalityadaptor.h"
12
13
  #include "map.h"
13
14
  #include "nothing.h"
14
15
  #include "nullable.h"
16
+ #include "bool.h"
15
17
  #include "numeric.h"
16
18
  #include "string.h"
17
19
  #include "./time.h" // `./` avoids possible conflicts with standard C time.h
@@ -49,6 +51,8 @@ static ColumnRef CreateTerminalColumn(const TypeAst& ast) {
49
51
  case Type::Void:
50
52
  return std::make_shared<ColumnNothing>();
51
53
 
54
+ case Type::Bool:
55
+ return std::make_shared<ColumnBool>();
52
56
  case Type::UInt8:
53
57
  return std::make_shared<ColumnUInt8>();
54
58
  case Type::UInt16:
@@ -136,6 +140,8 @@ static ColumnRef CreateTerminalColumn(const TypeAst& ast) {
136
140
  return nullptr;
137
141
  }
138
142
  return std::make_shared<ColumnTime64>(GetASTChildElement(ast, 0).value);
143
+ case Type::JSON:
144
+ return std::make_shared<ColumnJSON>();
139
145
  default:
140
146
  return nullptr;
141
147
  }
@@ -162,16 +168,26 @@ static ColumnRef CreateColumnFromAst(const TypeAst& ast, CreateColumnByTypeSetti
162
168
 
163
169
  case TypeAst::Tuple: {
164
170
  std::vector<ColumnRef> columns;
171
+ std::vector<std::string> names;
165
172
 
166
173
  columns.reserve(ast.elements.size());
174
+ names.reserve(ast.elements.size());
175
+ bool any_named = false;
167
176
  for (const auto& elem : ast.elements) {
168
177
  if (auto col = CreateColumnFromAst(elem, settings)) {
169
178
  columns.push_back(col);
179
+ names.push_back(elem.element_name);
180
+ if (!elem.element_name.empty()) {
181
+ any_named = true;
182
+ }
170
183
  } else {
171
184
  return nullptr;
172
185
  }
173
186
  }
174
187
 
188
+ if (any_named) {
189
+ return std::make_shared<ColumnTuple>(columns, std::move(names));
190
+ }
175
191
  return std::make_shared<ColumnTuple>(columns);
176
192
  }
177
193
 
@@ -44,6 +44,7 @@ void ItemView::ValidateData(Type::Code type, DataType data) {
44
44
  case Type::Code::Int8:
45
45
  case Type::Code::UInt8:
46
46
  case Type::Code::Enum8:
47
+ case Type::Code::Bool:
47
48
  return AssertSize({1});
48
49
 
49
50
  case Type::Code::Int16:
@@ -72,6 +73,7 @@ void ItemView::ValidateData(Type::Code type, DataType data) {
72
73
 
73
74
  case Type::Code::String:
74
75
  case Type::Code::FixedString:
76
+ case Type::Code::JSON:
75
77
  // value can be of any size
76
78
  return;
77
79
 
@@ -28,7 +28,9 @@ private:
28
28
  inline auto ConvertToStorageValue(const T& t) {
29
29
  if constexpr (std::is_same_v<std::string_view, T> || std::is_same_v<std::string, T>) {
30
30
  return std::string_view{t};
31
- } else if constexpr (std::is_fundamental_v<T> || std::is_same_v<Int128, std::decay_t<T>> || std::is_same_v<UInt128, std::decay_t<T>>) {
31
+ } else if constexpr (std::is_fundamental_v<T>
32
+ || std::is_same_v<Int128, std::decay_t<T>>
33
+ || std::is_same_v<UInt128, std::decay_t<T>>) {
32
34
  return std::string_view{reinterpret_cast<const char*>(&t), sizeof(T)};
33
35
  } else {
34
36
  static_assert(!std::is_same_v<T, T>, "Unknown type, which can't be stored in ItemView");
@@ -65,7 +67,9 @@ public:
65
67
  using ValueType = std::remove_cv_t<std::decay_t<T>>;
66
68
  if constexpr (std::is_same_v<std::string_view, ValueType> || std::is_same_v<std::string, ValueType>) {
67
69
  return data;
68
- } else if constexpr (std::is_fundamental_v<ValueType> || std::is_same_v<Int128, ValueType> || std::is_same_v<UInt128, ValueType>) {
70
+ } else if constexpr (std::is_fundamental_v<ValueType>
71
+ || std::is_same_v<Int128, ValueType>
72
+ || std::is_same_v<UInt128, ValueType>) {
69
73
  if (sizeof(ValueType) == data.size()) {
70
74
  return *reinterpret_cast<const T*>(data.data());
71
75
  } else {
@@ -0,0 +1,102 @@
1
+ #include "json.h"
2
+ #include "../base/wire_format.h"
3
+
4
+ namespace clickhouse {
5
+
6
+ enum class JSONSerializationVersion : uint64_t {
7
+ // String is the only currently supported serialization of JSON.
8
+ // it should be enabled with output_format_native_write_json_as_string=1
9
+ String = 1,
10
+ };
11
+
12
+ ColumnJSON::ColumnJSON()
13
+ : Column(Type::CreateJSON())
14
+ , data_(std::make_shared<ColumnString>())
15
+ {}
16
+
17
+ ColumnJSON::ColumnJSON(std::vector<std::string> data)
18
+ : Column(Type::CreateJSON())
19
+ , data_(std::make_shared<ColumnString>(std::move(data)))
20
+ {}
21
+
22
+ void ColumnJSON::Append(std::string_view str) {
23
+ data_->Append(str);
24
+ }
25
+
26
+ void ColumnJSON::Append(const char* str) {
27
+ data_->Append(str);
28
+ }
29
+ void ColumnJSON::Append(std::string&& str) {
30
+ data_->Append(std::move(str));
31
+ }
32
+
33
+ std::string_view ColumnJSON::At(size_t n) const {
34
+ return data_->At(n);
35
+ }
36
+
37
+ void ColumnJSON::Append(ColumnRef column) {
38
+ if (auto col = column->As<ColumnJSON>()) {
39
+ data_->Append(col->data_);
40
+ }
41
+ }
42
+
43
+ void ColumnJSON::Reserve(size_t new_cap) {
44
+ data_->Reserve(new_cap);
45
+ }
46
+
47
+ bool ColumnJSON::LoadPrefix(InputStream* input, size_t) {
48
+ uint64_t v;
49
+ if (!WireFormat::ReadFixed(*input, &v)) {
50
+ return false;
51
+ }
52
+ if (v != static_cast<uint64_t>(JSONSerializationVersion::String)) {
53
+ // Hard stop: the library can only parse JSON when `output_format_native_write_json_as_string` is enabled.
54
+ // Further processing is meaningless after this error and the user must be notified immediately.
55
+ throw ProtocolError("Unsupported JSON serialization version. "
56
+ "Make sure output_format_native_write_json_as_string=1 is set.");
57
+ }
58
+ return true;
59
+ }
60
+
61
+ bool ColumnJSON::LoadBody(InputStream* input, size_t rows) {
62
+ return data_->LoadBody(input, rows);
63
+ }
64
+
65
+ void ColumnJSON::SavePrefix(OutputStream* output) {
66
+ WireFormat::WriteFixed(*output, static_cast<uint64_t>(JSONSerializationVersion::String));
67
+ }
68
+
69
+ void ColumnJSON::SaveBody(OutputStream* output) {
70
+ data_->SaveBody(output);
71
+ }
72
+
73
+ void ColumnJSON::Clear() {
74
+ data_->Clear();
75
+ }
76
+
77
+ size_t ColumnJSON::Size() const {
78
+ return data_->Size();
79
+ }
80
+
81
+ ColumnRef ColumnJSON::Slice(size_t begin, size_t len) const {
82
+ auto ret = std::make_shared<ColumnJSON>();
83
+ auto sliced_data = data_->Slice(begin, len)->As<ColumnString>();
84
+ ret->data_->Swap(*sliced_data);
85
+ return ret;
86
+ }
87
+
88
+ ColumnRef ColumnJSON::CloneEmpty() const
89
+ {
90
+ return std::make_shared<ColumnJSON>();
91
+ }
92
+
93
+ void ColumnJSON::Swap(Column& other) {
94
+ auto & col = dynamic_cast<ColumnJSON &>(other);
95
+ data_.swap(col.data_);
96
+ }
97
+
98
+ ItemView ColumnJSON::GetItem(size_t index) const {
99
+ return ItemView{Type::JSON, data_->GetItem(index)};
100
+ }
101
+
102
+ }
@@ -0,0 +1,82 @@
1
+ #pragma once
2
+
3
+ #include "column.h"
4
+ #include "string.h"
5
+ #include "nullable.h"
6
+
7
+ namespace clickhouse {
8
+
9
+ /**
10
+ * JSON Column: Represents JSON values as strings.
11
+ * Works only when ClickHouse outputs JSON as strings and requires the setting
12
+ * output_format_native_write_json_as_string to be set to 1 for selecting data.
13
+ * Inserting JSON data does not require setting this setting.
14
+ *
15
+ * WARNING: THIS IS AN EXPERIMENTAL IMPLEMENTATION.
16
+ * The API may change in the future as we continue working on full support for JSON columns.
17
+ *
18
+ * ClickHouse does not accept empty strings as JSON; it requires an empty object ({}).
19
+ * For nullable columns, each row marked a NULL must contain {}.
20
+ * For convenience `clickhouse::ColumnNullableT<ColumnJSON>` automatically inserts {} for NULL rows.
21
+ */
22
+ class ColumnJSON : public Column {
23
+ public:
24
+
25
+ ColumnJSON();
26
+ explicit ColumnJSON(std::vector<std::string> data);
27
+
28
+ /// Appends one element to the column.
29
+ void Append(std::string_view str);
30
+
31
+ void Append(const char* str);
32
+ void Append(std::string&& str);
33
+
34
+ std::string_view At(size_t n) const;
35
+ inline std::string_view operator [] (size_t n) const { return At(n); }
36
+
37
+ /// Appends content of given column to the end of current one.
38
+ void Append(ColumnRef column) override;
39
+
40
+ /// Increase the capacity of the column for large block insertion.
41
+ void Reserve(size_t new_cap) override;
42
+
43
+ /// Loads column prefix from input stream.
44
+ bool LoadPrefix(InputStream* input, size_t rows) override;
45
+
46
+ /// Loads column data from input stream.
47
+ bool LoadBody(InputStream* input, size_t rows) override;
48
+
49
+ /// Saves column prefix to output stream. Column types with prefixes must implement it.
50
+ void SavePrefix(OutputStream* output) override;
51
+
52
+ /// Saves column data to output stream.
53
+ void SaveBody(OutputStream* output) override;
54
+
55
+ /// Clear column data .
56
+ void Clear() override;
57
+
58
+ /// Returns count of rows in the column.
59
+ size_t Size() const override;
60
+
61
+ /// Makes slice of the current column.
62
+ ColumnRef Slice(size_t begin, size_t len) const override;
63
+ ColumnRef CloneEmpty() const override;
64
+ void Swap(Column& other) override;
65
+
66
+ ItemView GetItem(size_t index) const override;
67
+
68
+ private:
69
+ std::shared_ptr<ColumnString> data_;
70
+ };
71
+
72
+ template <>
73
+ inline void ColumnNullableT<ColumnJSON>::Append(std::optional<std::string_view> value) {
74
+ ColumnNullable::Append(!value.has_value());
75
+ if (value.has_value()) {
76
+ typed_nested_data_->Append(*value);
77
+ } else {
78
+ typed_nested_data_->Append(std::string_view("{}"));
79
+ }
80
+ }
81
+
82
+ }
@@ -396,7 +396,8 @@ void ColumnLowCardinality::SavePrefix(OutputStream* output) {
396
396
  }
397
397
 
398
398
  void ColumnLowCardinality::SaveBody(OutputStream* output) {
399
- const uint64_t index_serialization_type = indexTypeFromIndexColumn(*index_column_) | IndexFlag::HasAdditionalKeysBit;
399
+ const uint64_t index_serialization_type =
400
+ static_cast<uint64_t>(indexTypeFromIndexColumn(*index_column_)) | IndexFlag::HasAdditionalKeysBit;
400
401
  WireFormat::WriteFixed(*output, index_serialization_type);
401
402
 
402
403
  const uint64_t number_of_keys = dictionary_column_->Size();
@@ -136,8 +136,13 @@ struct ColumnString::Block
136
136
  std::string_view AppendUnsafe(std::string_view str) {
137
137
  const auto pos = &data_[size];
138
138
 
139
- memcpy(pos, str.data(), str.size());
140
- size += str.size();
139
+ // memcpy's source pointer is declared nonnull regardless of the
140
+ // size argument, so an empty string_view backed by std::string()
141
+ // (where data() may be null) trips UBSan on every empty append.
142
+ if (str.size() > 0) {
143
+ memcpy(pos, str.data(), str.size());
144
+ size += str.size();
145
+ }
141
146
 
142
147
  return std::string_view(pos, str.size());
143
148
  }
@@ -1,8 +1,9 @@
1
1
  #include "tuple.h"
2
2
 
3
3
  namespace clickhouse {
4
+ namespace {
4
5
 
5
- static std::vector<TypeRef> CollectTypes(const std::vector<ColumnRef>& columns) {
6
+ std::vector<TypeRef> CollectTypes(const std::vector<ColumnRef>& columns) {
6
7
  std::vector<TypeRef> types;
7
8
  for (const auto& col : columns) {
8
9
  types.push_back(col->Type());
@@ -10,12 +11,45 @@ static std::vector<TypeRef> CollectTypes(const std::vector<ColumnRef>& columns)
10
11
  return types;
11
12
  }
12
13
 
14
+ /// Tuple types can be appended if they have the same shape.
15
+ bool CanAppendType(const TypeRef& destination_type, const TypeRef& source_type) {
16
+ if (destination_type->GetCode() != Type::Tuple || source_type->GetCode() != Type::Tuple) {
17
+ return destination_type->IsEqual(source_type);
18
+ }
19
+
20
+ const auto* destination_tuple = destination_type->As<TupleType>();
21
+ const auto* source_tuple = source_type->As<TupleType>();
22
+
23
+ const auto destination_item_types = destination_tuple->GetTupleType();
24
+ const auto source_item_types = source_tuple->GetTupleType();
25
+ if (destination_item_types.size() != source_item_types.size()) {
26
+ return false;
27
+ }
28
+
29
+ for (size_t i = 0; i < destination_item_types.size(); ++i) {
30
+ if (!CanAppendType(destination_item_types[i], source_item_types[i])) {
31
+ return false;
32
+ }
33
+ }
34
+
35
+ return true;
36
+ }
37
+
38
+ }
39
+
13
40
  ColumnTuple::ColumnTuple(const std::vector<ColumnRef>& columns)
14
41
  : Column(Type::CreateTuple(CollectTypes(columns)))
15
42
  , columns_(columns)
16
43
  {
17
44
  }
18
45
 
46
+ ColumnTuple::ColumnTuple(const std::vector<ColumnRef>& columns,
47
+ std::vector<std::string> names)
48
+ : Column(Type::CreateTuple(CollectTypes(columns), std::move(names)))
49
+ , columns_(columns)
50
+ {
51
+ }
52
+
19
53
  size_t ColumnTuple::TupleSize() const {
20
54
  return columns_.size();
21
55
  }
@@ -23,11 +57,11 @@ size_t ColumnTuple::TupleSize() const {
23
57
  void ColumnTuple::Reserve(size_t new_cap) {
24
58
  for (auto& column : columns_) {
25
59
  column->Reserve(new_cap);
26
- }
60
+ }
27
61
  }
28
62
 
29
63
  void ColumnTuple::Append(ColumnRef column) {
30
- if (!this->Type()->IsEqual(column->Type())) {
64
+ if (!CanAppendType(this->Type(), column->Type())) {
31
65
  throw ValidationError(
32
66
  "can't append column of type " + column->Type()->GetName() + " "
33
67
  "to column type " + this->Type()->GetName());
@@ -37,6 +71,7 @@ void ColumnTuple::Append(ColumnRef column) {
37
71
  columns_[ci]->Append((*source_tuple_column)[ci]);
38
72
  }
39
73
  }
74
+
40
75
  size_t ColumnTuple::Size() const {
41
76
  return columns_.empty() ? 0 : columns_[0]->Size();
42
77
  }
@@ -48,7 +83,11 @@ ColumnRef ColumnTuple::Slice(size_t begin, size_t len) const {
48
83
  sliced_columns.push_back(column->Slice(begin, len));
49
84
  }
50
85
 
51
- return std::make_shared<ColumnTuple>(sliced_columns);
86
+ const auto& names = this->Type()->As<TupleType>()->GetItemNames();
87
+ if (names.empty()) {
88
+ return std::make_shared<ColumnTuple>(sliced_columns);
89
+ }
90
+ return std::make_shared<ColumnTuple>(sliced_columns, names);
52
91
  }
53
92
 
54
93
  ColumnRef ColumnTuple::CloneEmpty() const {
@@ -59,7 +98,11 @@ ColumnRef ColumnTuple::CloneEmpty() const {
59
98
  result_columns.push_back(column->CloneEmpty());
60
99
  }
61
100
 
62
- return std::make_shared<ColumnTuple>(result_columns);
101
+ const auto& names = this->Type()->As<TupleType>()->GetItemNames();
102
+ if (names.empty()) {
103
+ return std::make_shared<ColumnTuple>(result_columns);
104
+ }
105
+ return std::make_shared<ColumnTuple>(result_columns, names);
63
106
  }
64
107
 
65
108
  bool ColumnTuple::LoadPrefix(InputStream* input, size_t rows) {