duckdb 0.6.2-dev960.0 → 0.6.2-dev969.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "0.6.2-dev960.0",
5
+ "version": "0.6.2-dev969.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
@@ -1,8 +1,8 @@
1
1
  #ifndef DUCKDB_VERSION
2
- #define DUCKDB_VERSION "0.6.2-dev960"
2
+ #define DUCKDB_VERSION "0.6.2-dev969"
3
3
  #endif
4
4
  #ifndef DUCKDB_SOURCE_ID
5
- #define DUCKDB_SOURCE_ID "84b9e770f3"
5
+ #define DUCKDB_SOURCE_ID "8fa1b6e786"
6
6
  #endif
7
7
  #include "duckdb/function/table/system_functions.hpp"
8
8
  #include "duckdb/main/database.hpp"
@@ -129,19 +129,9 @@ void StructColumnData::Append(BaseStatistics &stats, ColumnAppendState &state, V
129
129
  // append the null values
130
130
  validity.Append(*stats.validity_stats, state.child_appends[0], vector, count);
131
131
 
132
- auto &struct_validity = FlatVector::Validity(vector);
133
-
134
132
  auto &struct_stats = (StructStatistics &)stats;
135
133
  auto &child_entries = StructVector::GetEntries(vector);
136
134
  for (idx_t i = 0; i < child_entries.size(); i++) {
137
- if (!struct_validity.AllValid()) {
138
- // we set the child entries of the struct to NULL
139
- // for any values in which the struct itself is NULL
140
- child_entries[i]->Flatten(count);
141
-
142
- auto &child_validity = FlatVector::Validity(*child_entries[i]);
143
- child_validity.Combine(struct_validity, count);
144
- }
145
135
  sub_columns[i]->Append(*struct_stats.child_stats[i], state.child_appends[i + 1], *child_entries[i], count);
146
136
  }
147
137
  }
@@ -454,21 +454,24 @@ static inline size_t compressBulk(SymbolTable &symbolTable, size_t nlines, size_
454
454
  #define FSST_SAMPLELINE ((size_t) 512)
455
455
 
456
456
  // quickly select a uniformly random set of lines such that we have between [FSST_SAMPLETARGET,FSST_SAMPLEMAXSZ) string bytes
457
- vector<u8*> makeSample(u8* sampleBuf, u8* strIn[], size_t **lenRef, size_t nlines) {
458
- size_t totSize = 0, *lenIn = *lenRef;
457
+ vector<u8*> makeSample(u8* sampleBuf, u8* strIn[], size_t *lenIn, size_t nlines,
458
+ unique_ptr<vector<size_t>>& sample_len_out) {
459
+ size_t totSize = 0;
459
460
  vector<u8*> sample;
460
461
 
461
462
  for(size_t i=0; i<nlines; i++)
462
463
  totSize += lenIn[i];
463
-
464
464
  if (totSize < FSST_SAMPLETARGET) {
465
465
  for(size_t i=0; i<nlines; i++)
466
466
  sample.push_back(strIn[i]);
467
467
  } else {
468
468
  size_t sampleRnd = FSST_HASH(4637947);
469
469
  u8* sampleLim = sampleBuf + FSST_SAMPLETARGET;
470
- size_t *sampleLen = *lenRef = new size_t[nlines + FSST_SAMPLEMAXSZ/FSST_SAMPLELINE];
471
470
 
471
+ sample_len_out = unique_ptr<vector<size_t>>(new vector<size_t>());
472
+ sample_len_out->reserve(nlines + FSST_SAMPLEMAXSZ/FSST_SAMPLELINE);
473
+
474
+ // This fails if we have a lot of small strings and a few big ones?
472
475
  while(sampleBuf < sampleLim) {
473
476
  // choose a non-empty line
474
477
  sampleRnd = FSST_HASH(sampleRnd);
@@ -485,7 +488,9 @@ vector<u8*> makeSample(u8* sampleBuf, u8* strIn[], size_t **lenRef, size_t nline
485
488
  size_t len = min(lenIn[linenr]-chunk,FSST_SAMPLELINE);
486
489
  memcpy(sampleBuf, strIn[linenr]+chunk, len);
487
490
  sample.push_back(sampleBuf);
488
- sampleBuf += *sampleLen++ = len;
491
+
492
+ sample_len_out->push_back(len);
493
+ sampleBuf += len;
489
494
  }
490
495
  }
491
496
  return sample;
@@ -493,11 +498,11 @@ vector<u8*> makeSample(u8* sampleBuf, u8* strIn[], size_t **lenRef, size_t nline
493
498
 
494
499
  extern "C" duckdb_fsst_encoder_t* duckdb_fsst_create(size_t n, size_t lenIn[], u8 *strIn[], int zeroTerminated) {
495
500
  u8* sampleBuf = new u8[FSST_SAMPLEMAXSZ];
496
- size_t *sampleLen = lenIn;
497
- vector<u8*> sample = makeSample(sampleBuf, strIn, &sampleLen, n?n:1); // careful handling of input to get a right-size and representative sample
501
+ unique_ptr<vector<size_t>> sample_sizes;
502
+ vector<u8*> sample = makeSample(sampleBuf, strIn, lenIn, n?n:1, sample_sizes); // careful handling of input to get a right-size and representative sample
498
503
  Encoder *encoder = new Encoder();
504
+ size_t* sampleLen = sample_sizes ? sample_sizes->data() : &lenIn[0];
499
505
  encoder->symbolTable = shared_ptr<SymbolTable>(buildSymbolTable(encoder->counters, sample, sampleLen, zeroTerminated));
500
- if (sampleLen != lenIn) delete[] sampleLen;
501
506
  delete[] sampleBuf;
502
507
  return (duckdb_fsst_encoder_t*) encoder;
503
508
  }