duckdb 0.6.2-dev960.0 → 0.6.2-dev964.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "0.6.2-dev960.0",
5
+ "version": "0.6.2-dev964.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
@@ -1,8 +1,8 @@
1
1
  #ifndef DUCKDB_VERSION
2
- #define DUCKDB_VERSION "0.6.2-dev960"
2
+ #define DUCKDB_VERSION "0.6.2-dev964"
3
3
  #endif
4
4
  #ifndef DUCKDB_SOURCE_ID
5
- #define DUCKDB_SOURCE_ID "84b9e770f3"
5
+ #define DUCKDB_SOURCE_ID "29c0bbc275"
6
6
  #endif
7
7
  #include "duckdb/function/table/system_functions.hpp"
8
8
  #include "duckdb/main/database.hpp"
@@ -454,21 +454,24 @@ static inline size_t compressBulk(SymbolTable &symbolTable, size_t nlines, size_
454
454
  #define FSST_SAMPLELINE ((size_t) 512)
455
455
 
456
456
  // quickly select a uniformly random set of lines such that we have between [FSST_SAMPLETARGET,FSST_SAMPLEMAXSZ) string bytes
457
- vector<u8*> makeSample(u8* sampleBuf, u8* strIn[], size_t **lenRef, size_t nlines) {
458
- size_t totSize = 0, *lenIn = *lenRef;
457
+ vector<u8*> makeSample(u8* sampleBuf, u8* strIn[], size_t *lenIn, size_t nlines,
458
+ unique_ptr<vector<size_t>>& sample_len_out) {
459
+ size_t totSize = 0;
459
460
  vector<u8*> sample;
460
461
 
461
462
  for(size_t i=0; i<nlines; i++)
462
463
  totSize += lenIn[i];
463
-
464
464
  if (totSize < FSST_SAMPLETARGET) {
465
465
  for(size_t i=0; i<nlines; i++)
466
466
  sample.push_back(strIn[i]);
467
467
  } else {
468
468
  size_t sampleRnd = FSST_HASH(4637947);
469
469
  u8* sampleLim = sampleBuf + FSST_SAMPLETARGET;
470
- size_t *sampleLen = *lenRef = new size_t[nlines + FSST_SAMPLEMAXSZ/FSST_SAMPLELINE];
471
470
 
471
+ sample_len_out = unique_ptr<vector<size_t>>(new vector<size_t>());
472
+ sample_len_out->reserve(nlines + FSST_SAMPLEMAXSZ/FSST_SAMPLELINE);
473
+
474
+ // This fails if we have a lot of small strings and a few big ones?
472
475
  while(sampleBuf < sampleLim) {
473
476
  // choose a non-empty line
474
477
  sampleRnd = FSST_HASH(sampleRnd);
@@ -485,7 +488,9 @@ vector<u8*> makeSample(u8* sampleBuf, u8* strIn[], size_t **lenRef, size_t nline
485
488
  size_t len = min(lenIn[linenr]-chunk,FSST_SAMPLELINE);
486
489
  memcpy(sampleBuf, strIn[linenr]+chunk, len);
487
490
  sample.push_back(sampleBuf);
488
- sampleBuf += *sampleLen++ = len;
491
+
492
+ sample_len_out->push_back(len);
493
+ sampleBuf += len;
489
494
  }
490
495
  }
491
496
  return sample;
@@ -493,11 +498,11 @@ vector<u8*> makeSample(u8* sampleBuf, u8* strIn[], size_t **lenRef, size_t nline
493
498
 
494
499
  extern "C" duckdb_fsst_encoder_t* duckdb_fsst_create(size_t n, size_t lenIn[], u8 *strIn[], int zeroTerminated) {
495
500
  u8* sampleBuf = new u8[FSST_SAMPLEMAXSZ];
496
- size_t *sampleLen = lenIn;
497
- vector<u8*> sample = makeSample(sampleBuf, strIn, &sampleLen, n?n:1); // careful handling of input to get a right-size and representative sample
501
+ unique_ptr<vector<size_t>> sample_sizes;
502
+ vector<u8*> sample = makeSample(sampleBuf, strIn, lenIn, n?n:1, sample_sizes); // careful handling of input to get a right-size and representative sample
498
503
  Encoder *encoder = new Encoder();
504
+ size_t* sampleLen = sample_sizes ? sample_sizes->data() : &lenIn[0];
499
505
  encoder->symbolTable = shared_ptr<SymbolTable>(buildSymbolTable(encoder->counters, sample, sampleLen, zeroTerminated));
500
- if (sampleLen != lenIn) delete[] sampleLen;
501
506
  delete[] sampleBuf;
502
507
  return (duckdb_fsst_encoder_t*) encoder;
503
508
  }