databonk 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,33 @@
1
1
  import { DataFrame } from '../core/dataframe.js';
2
2
  export type JoinType = 'inner' | 'left' | 'right' | 'outer';
3
+ /**
4
+ * Join algorithm selection:
5
+ * - 'hash': Hash join - good for most cases, O(n+m) with good hash distribution
6
+ * - 'sort-merge': Sort-merge join - better cache locality for large sorted datasets
7
+ * - 'auto': Automatically select based on available indices
8
+ */
9
+ export type JoinAlgorithm = 'hash' | 'sort-merge' | 'auto';
10
+ /**
11
+ * Options for join operations.
12
+ */
13
+ export interface JoinOptions {
14
+ /** Column name suffixes for overlapping non-key columns. Default: ['_x', '_y'] */
15
+ suffixes?: [string, string];
16
+ /** Join algorithm to use. Default: 'auto' */
17
+ algorithm?: JoinAlgorithm;
18
+ /** Whether to use existing indices. Default: true */
19
+ useIndices?: boolean;
20
+ }
3
21
  export declare class Joiner {
4
- static join(left: DataFrame, right: DataFrame, on: string | string[], how?: JoinType, suffixes?: [string, string]): DataFrame;
22
+ static join(left: DataFrame, right: DataFrame, on: string | string[], how?: JoinType, options?: JoinOptions | [string, string]): DataFrame;
23
+ /**
24
+ * Select the best join algorithm based on available indices and data characteristics.
25
+ */
26
+ private static selectAlgorithm;
27
+ /**
28
+ * Get an existing hash index or build a new one.
29
+ */
30
+ private static getOrBuildHashIndex;
5
31
  private static validateJoinKeys;
6
32
  private static buildHashIndex;
7
33
  /**
@@ -12,11 +38,21 @@ export declare class Joiner {
12
38
  private static leftJoin;
13
39
  private static rightJoin;
14
40
  private static outerJoin;
41
+ /**
42
+ * Sort-merge join algorithm.
43
+ * Efficient when both sides have sorted indices - uses sequential access pattern
44
+ * which is more cache-friendly for large datasets.
45
+ */
46
+ private static sortMergeJoin;
47
+ /**
48
+ * Get sorted entries from an existing sorted index or build them.
49
+ */
50
+ private static getOrBuildSortedEntries;
15
51
  private static buildJoinedDataFrame;
16
52
  }
17
53
  declare module '../core/dataframe.js' {
18
54
  interface DataFrame {
19
- join(other: DataFrame, on: string | string[], how?: JoinType, suffixes?: [string, string]): DataFrame;
55
+ join(other: DataFrame, on: string | string[], how?: JoinType, options?: JoinOptions | [string, string]): DataFrame;
20
56
  }
21
57
  }
22
58
  //# sourceMappingURL=join.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"join.d.ts","sourceRoot":"","sources":["../../src/operations/join.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAa,MAAM,sBAAsB,CAAC;AAI5D,MAAM,MAAM,QAAQ,GAAG,OAAO,GAAG,MAAM,GAAG,OAAO,GAAG,OAAO,CAAC;AAE5D,qBAAa,MAAM;IACjB,MAAM,CAAC,IAAI,CACT,IAAI,EAAE,SAAS,EACf,KAAK,EAAE,SAAS,EAChB,EAAE,EAAE,MAAM,GAAG,MAAM,EAAE,EACrB,GAAG,GAAE,QAAkB,EACvB,QAAQ,GAAE,CAAC,MAAM,EAAE,MAAM,CAAgB,GACxC,SAAS;IAsBZ,OAAO,CAAC,MAAM,CAAC,gBAAgB;IAW/B,OAAO,CAAC,MAAM,CAAC,cAAc;IAqC7B;;OAEG;IACH,OAAO,CAAC,MAAM,CAAC,aAAa;IAa5B,OAAO,CAAC,MAAM,CAAC,SAAS;IAwCxB,OAAO,CAAC,MAAM,CAAC,QAAQ;IAuCvB,OAAO,CAAC,MAAM,CAAC,SAAS;IAuCxB,OAAO,CAAC,MAAM,CAAC,SAAS;IAoDxB,OAAO,CAAC,MAAM,CAAC,oBAAoB;CA+CpC;AAED,OAAO,QAAQ,sBAAsB,CAAC;IACpC,UAAU,SAAS;QACjB,IAAI,CACF,KAAK,EAAE,SAAS,EAChB,EAAE,EAAE,MAAM,GAAG,MAAM,EAAE,EACrB,GAAG,CAAC,EAAE,QAAQ,EACd,QAAQ,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,GAC1B,SAAS,CAAC;KACd;CACF"}
1
+ {"version":3,"file":"join.d.ts","sourceRoot":"","sources":["../../src/operations/join.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAa,MAAM,sBAAsB,CAAC;AAK5D,MAAM,MAAM,QAAQ,GAAG,OAAO,GAAG,MAAM,GAAG,OAAO,GAAG,OAAO,CAAC;AAE5D;;;;;GAKG;AACH,MAAM,MAAM,aAAa,GAAG,MAAM,GAAG,YAAY,GAAG,MAAM,CAAC;AAE3D;;GAEG;AACH,MAAM,WAAW,WAAW;IAC1B,kFAAkF;IAClF,QAAQ,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC5B,6CAA6C;IAC7C,SAAS,CAAC,EAAE,aAAa,CAAC;IAC1B,qDAAqD;IACrD,UAAU,CAAC,EAAE,OAAO,CAAC;CACtB;AAED,qBAAa,MAAM;IACjB,MAAM,CAAC,IAAI,CACT,IAAI,EAAE,SAAS,EACf,KAAK,EAAE,SAAS,EAChB,EAAE,EAAE,MAAM,GAAG,MAAM,EAAE,EACrB,GAAG,GAAE,QAAkB,EACvB,OAAO,GAAE,WAAW,GAAG,CAAC,MAAM,EAAE,MAAM,CAAM,GAC3C,SAAS;IA4CZ;;OAEG;IACH,OAAO,CAAC,MAAM,CAAC,eAAe;IAkC9B;;OAEG;IACH,OAAO,CAAC,MAAM,CAAC,mBAAmB;IAkBlC,OAAO,CAAC,MAAM,CAAC,gBAAgB;IAW/B,OAAO,CAAC,MAAM,CAAC,cAAc;IAqC7B;;OAEG;IACH,OAAO,CAAC,MAAM,CAAC,aAAa;IAa5B,OAAO,CAAC,MAAM,CAAC,SAAS;IAwCxB,OAAO,CAAC,MAAM,CAAC,QAAQ;IAuCvB,OAAO,CAAC,MAAM,CAAC,SAAS;IAuCxB,OAAO,CAAC,MAAM,CAAC,SAAS;IAoDxB;;;;OAIG;IACH,OAAO,CAAC,MAAM,CAAC,aAAa;IAoF5B;;OAEG;IACH,OAAO,CAAC,MAAM,CAAC,uBAAuB;IA+BtC,OAAO,CAAC,MAAM,CAAC,oBAAoB;CAsDpC;AAED,OAAO,QAAQ,sBAAsB,CAAC;IACpC,UAAU,SAAS;QACjB,IAAI,CACF,KAAK,EAAE,SAAS,EAChB,EAAE,EAAE,MAAM,GAAG,MAAM,EAAE,EACrB,GAAG,CAAC,EAAE,QAAQ,EACd,OAAO,CAAC,EAAE,WAAW,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,GACvC,SAAS,CAAC;KACd;CACF"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "databonk",
3
- "version": "0.0.2",
3
+ "version": "0.0.3",
4
4
  "type": "module",
5
5
  "description": "A lightweight, fast data frame library for JavaScript and TypeScript",
6
6
  "main": "dist/index.js",